SuperLU_DIST_5.3.0/0000755013363400111340000000000013234133726012647 5ustar xiaoyessgSuperLU_DIST_5.3.0/DOC/0000755013363400111340000000000013233431301013241 5ustar xiaoyessgSuperLU_DIST_5.3.0/SRC/0000755013363400111340000000000013234133023013264 5ustar xiaoyessgSuperLU_DIST_5.3.0/SRC/dmach_dist.c0000644013363400111340000000560213233431301015531 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ #include #include #include #include double dmach_dist(char *cmach) { /* -- SuperLU auxiliary routine (version 5.0) -- This uses C99 standard constants, and is thread safe. Must be compiled with -std=c99 flag. Purpose ======= DMACH returns double precision machine parameters. Arguments ========= CMACH (input) CHARACTER*1 Specifies the value to be returned by DMACH: = 'E' or 'e', DMACH := eps = 'S' or 's , DMACH := sfmin = 'B' or 'b', DMACH := base = 'P' or 'p', DMACH := eps*base = 'N' or 'n', DMACH := t = 'R' or 'r', DMACH := rnd = 'M' or 'm', DMACH := emin = 'U' or 'u', DMACH := rmin = 'L' or 'l', DMACH := emax = 'O' or 'o', DMACH := rmax where eps = relative machine precision sfmin = safe minimum, such that 1/sfmin does not overflow base = base of the machine prec = eps*base t = number of (base) digits in the mantissa rnd = 1.0 when rounding occurs in addition, 0.0 otherwise emin = minimum exponent before (gradual) underflow rmin = underflow threshold - base**(emin-1) emax = largest exponent before overflow rmax = overflow threshold - (base**emax)*(1-eps) ===================================================================== */ double sfmin, small, rmach; if ( strncmp(cmach, "E", 1)==0 ) { rmach = DBL_EPSILON * 0.5; } else if ( strncmp(cmach, "S", 1)==0 ) { sfmin = DBL_MIN; small = 1. / DBL_MAX; if (small >= sfmin) { /* Use SMALL plus a bit, to avoid the possibility of rounding causing overflow when computing 1/sfmin. */ sfmin = small * (DBL_EPSILON*0.5 + 1.); } rmach = sfmin; } else if ( strncmp(cmach, "B", 1)==0 ) { rmach = FLT_RADIX; } else if ( strncmp(cmach, "P", 1)==0 ) { rmach = DBL_EPSILON * 0.5 * FLT_RADIX; } else if ( strncmp(cmach, "N", 1)==0 ) { rmach = DBL_MANT_DIG; } else if ( strncmp(cmach, "R", 1)==0 ) { rmach = FLT_ROUNDS; } else if ( strncmp(cmach, "M", 1)==0 ) { rmach = DBL_MIN_EXP; } else if ( strncmp(cmach, "U", 1)==0 ) { rmach = DBL_MIN; } else if ( strncmp(cmach, "L", 1)==0 ) { rmach = DBL_MAX_EXP; } else if ( strncmp(cmach, "O", 1)==0 ) { rmach = DBL_MAX; } return rmach; } /* end dmach_dist */ SuperLU_DIST_5.3.0/SRC/zldperm_dist.c0000644013363400111340000001350013233431301016126 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Finds a row permutation so that the matrix has large entries on the diagonal * *
 * -- Distributed SuperLU routine (version 1.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 * 
*/ #include "superlu_zdefs.h" extern void mc64ad_dist(int_t*, int_t*, int_t*, int_t [], int_t [], double [], int_t*, int_t [], int_t*, int_t[], int_t*, double [], int_t [], int_t []); /*! \brief * *
 * Purpose
 * =======
 *
 *   ZLDPERM finds a row permutation so that the matrix has large
 *   entries on the diagonal.
 *
 * Arguments
 * =========
 *
 * job    (input) int
 *        Control the action. Possible values for JOB are:
 *        = 1 : Compute a row permutation of the matrix so that the
 *              permuted matrix has as many entries on its diagonal as
 *              possible. The values on the diagonal are of arbitrary size.
 *              HSL subroutine MC21A/AD is used for this.
 *        = 2 : Compute a row permutation of the matrix so that the smallest 
 *              value on the diagonal of the permuted matrix is maximized.
 *        = 3 : Compute a row permutation of the matrix so that the smallest
 *              value on the diagonal of the permuted matrix is maximized.
 *              The algorithm differs from the one used for JOB = 2 and may
 *              have quite a different performance.
 *        = 4 : Compute a row permutation of the matrix so that the sum
 *              of the diagonal entries of the permuted matrix is maximized.
 *        = 5 : Compute a row permutation of the matrix so that the product
 *              of the diagonal entries of the permuted matrix is maximized
 *              and vectors to scale the matrix so that the nonzero diagonal 
 *              entries of the permuted matrix are one in absolute value and 
 *              all the off-diagonal entries are less than or equal to one in 
 *              absolute value.
 *        Restriction: 1 <= JOB <= 5.
 *
 * n      (input) int
 *        The order of the matrix.
 *
 * nnz    (input) int
 *        The number of nonzeros in the matrix.
 *
 * adjncy (input) int*, of size nnz
 *        The adjacency structure of the matrix, which contains the row
 *        indices of the nonzeros.
 *
 * colptr (input) int*, of size n+1
 *        The pointers to the beginning of each column in ADJNCY.
 *
 * nzval  (input) doublecomplex*, of size nnz
 *        The nonzero values of the matrix. nzval[k] is the value of
 *        the entry corresponding to adjncy[k].
 *        It is not used if job = 1.
 *
 * perm   (output) int*, of size n
 *        The permutation vector. perm[i] = j means row i in the
 *        original matrix is in row j of the permuted matrix.
 *
 * u      (output) double*, of size n
 *        If job = 5, the natural logarithms of the row scaling factors. 
 *
 * v      (output) double*, of size n
 *        If job = 5, the natural logarithms of the column scaling factors. 
 *        The scaled matrix B has entries b_ij = a_ij * exp(u_i + v_j).
 * 
*/ int zldperm_dist(int_t job, int_t n, int_t nnz, int_t colptr[], int_t adjncy[], doublecomplex nzval[], int_t *perm, double u[], double v[]) { int_t i, liw, ldw, num; int_t *iw, icntl[10], info[10]; double *dw; double *nzval_abs = doubleMalloc_dist(nnz); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(0, "Enter zldperm_dist()"); #endif liw = 5*n; if ( job == 3 ) liw = 10*n + nnz; if ( !(iw = intMalloc_dist(liw)) ) ABORT("Malloc fails for iw[]"); ldw = 3*n + nnz; if ( !(dw = doubleMalloc_dist(ldw)) ) ABORT("Malloc fails for dw[]"); /* Increment one to get 1-based indexing. */ for (i = 0; i <= n; ++i) ++colptr[i]; for (i = 0; i < nnz; ++i) ++adjncy[i]; #if ( DEBUGlevel>=2 ) printf("LDPERM(): n %d, nnz %d\n", n, nnz); PrintInt10("colptr", n+1, colptr); PrintInt10("adjncy", nnz, adjncy); #endif /* * NOTE: * ===== * * MC64AD assumes that column permutation vector is defined as: * perm(i) = j means column i of permuted A is in column j of original A. * * Since a symmetric permutation preserves the diagonal entries. Then * by the following relation: * P'(A*P')P = P'A * we can apply inverse(perm) to rows of A to get large diagonal entries. * But, since 'perm' defined in MC64AD happens to be the reverse of * SuperLU's definition of permutation vector, therefore, it is already * an inverse for our purpose. We will thus use it directly. * */ mc64id_dist(icntl); /* Suppress error and warning messages. */ icntl[0] = -1; icntl[1] = -1; for (i = 0; i < nnz; ++i) nzval_abs[i] = slud_z_abs1(&nzval[i]); mc64ad_dist(&job, &n, &nnz, colptr, adjncy, nzval_abs, &num, perm, &liw, iw, &ldw, dw, icntl, info); #if ( DEBUGlevel>=2 ) PrintInt10("perm", n, perm); printf(".. After MC64AD info %d\tsize of matching %d\n", info[0], num); #endif if ( info[0] == 1 ) { /* Structurally singular */ printf(".. The last " IFMT " permutations:\n", n-num); PrintInt10("perm", n-num, &perm[num]); } /* Restore to 0-based indexing. */ for (i = 0; i <= n; ++i) --colptr[i]; for (i = 0; i < nnz; ++i) --adjncy[i]; for (i = 0; i < n; ++i) --perm[i]; if ( job == 5 ) for (i = 0; i < n; ++i) { u[i] = dw[i]; v[i] = dw[n+i]; } SUPERLU_FREE(iw); SUPERLU_FREE(dw); SUPERLU_FREE(nzval_abs); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(0, "Exit zldperm_dist()"); #endif return (info[0]); } SuperLU_DIST_5.3.0/SRC/pzlangs.c0000644013363400111340000001015513233431301015107 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Returns the value of the one norm, or the Frobenius norm, or the infinity norm, or the element of largest value * *
 * File name:	pzlangs.c
 * History:     Modified from lapack routine ZLANGE
 * 
*/ #include #include "superlu_zdefs.h" /*! \brief
 
    Purpose   
    =======   

    PZLANGS returns the value of the one norm, or the Frobenius norm, or 
    the infinity norm, or the element of largest absolute value of a 
    real matrix A.   

    Description   
    ===========   

    PZLANGE returns the value   

       PZLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'   
                 (   
                 ( norm1(A),         NORM = '1', 'O' or 'o'   
                 (   
                 ( normI(A),         NORM = 'I' or 'i'   
                 (   
                 ( normF(A),         NORM = 'F', 'f', 'E' or 'e'   

    where  norm1  denotes the  one norm of a matrix (maximum column sum), 
    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and 
    normF  denotes the  Frobenius norm of a matrix (square root of sum of 
    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.   

    Arguments   
    =========   

    NORM    (input) CHARACTER*1   
            Specifies the value to be returned in DLANGE as described above.   
    A       (input) SuperMatrix*
            The M by N sparse matrix A. 
    GRID    (input) gridinof_t*
            The 2D process mesh.
   ===================================================================== 
*/ double pzlangs(char *norm, SuperMatrix *A, gridinfo_t *grid) { /* Local variables */ NRformat_loc *Astore; int_t m_loc; doublecomplex *Aval; int_t i, j, jcol; double value=0., sum; double *rwork; double tempvalue; double *temprwork; Astore = (NRformat_loc *) A->Store; m_loc = Astore->m_loc; Aval = (doublecomplex *) Astore->nzval; if ( SUPERLU_MIN(A->nrow, A->ncol) == 0) { value = 0.; } else if ( strncmp(norm, "M", 1)==0 ) { /* Find max(abs(A(i,j))). */ value = 0.; for (i = 0; i < m_loc; ++i) { for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) value = SUPERLU_MAX( value, slud_z_abs(&Aval[j]) ); } MPI_Allreduce(&value, &tempvalue, 1, MPI_DOUBLE, MPI_MAX, grid->comm); value = tempvalue; } else if ( strncmp(norm, "O", 1)==0 || *(unsigned char *)norm == '1') { /* Find norm1(A). */ value = 0.; #if 0 for (j = 0; j < A->ncol; ++j) { sum = 0.; for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) sum += fabs(Aval[i]); value = SUPERLU_MAX(value,sum); } #else /* XSL ==> */ if ( !(rwork = (double *) doubleCalloc_dist(A->ncol)) ) ABORT("doubleCalloc_dist fails for rwork."); for (i = 0; i < m_loc; ++i) { for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) { jcol = Astore->colind[j]; rwork[jcol] += slud_z_abs(&Aval[j]); } } if ( !(temprwork = (double *) doubleCalloc_dist(A->ncol)) ) ABORT("doubleCalloc_dist fails for temprwork."); MPI_Allreduce(rwork, temprwork, A->ncol, MPI_DOUBLE, MPI_SUM, grid->comm); value = 0.; for (j = 0; j < A->ncol; ++j) { value = SUPERLU_MAX(value, temprwork[j]); } SUPERLU_FREE (temprwork); SUPERLU_FREE (rwork); #endif } else if ( strncmp(norm, "I", 1)==0 ) { /* Find normI(A). */ value = 0.; sum = 0.; for (i = 0; i < m_loc; ++i) { for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) sum += slud_z_abs(&Aval[j]); value = SUPERLU_MAX(value, sum); } MPI_Allreduce(&value, &tempvalue, 1, MPI_DOUBLE, MPI_MAX, grid->comm); value = tempvalue; } else if ( strncmp(norm, "F", 1)==0 || strncmp(norm, "E", 1)==0 ) { /* Find normF(A). */ ABORT("Not implemented."); } else { ABORT("Illegal norm specified."); } return (value); } /* pzlangs */ SuperLU_DIST_5.3.0/SRC/pzlaqgs.c0000644013363400111340000001044713233431301015116 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Equilibrates a general sparse M by N matrix * *
 * File name:	pzlaqgs.c
 * History:     Modified from LAPACK routine ZLAQGE
 * 
*/ #include #include "superlu_zdefs.h" /*! \brief
    Purpose   
    =======   

    PZLAQGS equilibrates a general sparse M by N matrix A using the row
    and column scaling factors in the vectors R and C.   

    See supermatrix.h for the definition of 'SuperMatrix' structure.

    Arguments   
    =========   

    A       (input/output) SuperMatrix*
            On exit, the equilibrated matrix.  See EQUED for the form of 
            the equilibrated matrix. The type of A can be:
	    Stype = SLU_NR_loc; Dtype = SLU_Z; Mtype = SLU_GE.
	    
    R       (input) double*, dimension (A->nrow)
            The row scale factors for A.
	    
    C       (input) double*, dimension (A->ncol)
            The column scale factors for A.
	    
    ROWCND  (input) double
            Ratio of the smallest R(i) to the largest R(i).
	    
    COLCND  (input) double
            Ratio of the smallest C(i) to the largest C(i).
	    
    AMAX    (input) double
            Absolute value of largest matrix entry.
	    
    EQUED   (output) char*
            Specifies the form of equilibration that was done.   
            = 'N':  No equilibration   
            = 'R':  Row equilibration, i.e., A has been premultiplied by  
                    diag(R).   
            = 'C':  Column equilibration, i.e., A has been postmultiplied  
                    by diag(C).   
            = 'B':  Both row and column equilibration, i.e., A has been
                    replaced by diag(R) * A * diag(C).   

    Internal Parameters   
    ===================   

    THRESH is a threshold value used to decide if row or column scaling   
    should be done based on the ratio of the row or column scaling   
    factors.  If ROWCND < THRESH, row scaling is done, and if   
    COLCND < THRESH, column scaling is done.   

    LARGE and SMALL are threshold values used to decide if row scaling   
    should be done based on the absolute size of the largest matrix   
    element.  If AMAX > LARGE or AMAX < SMALL, row scaling is done.   

    ===================================================================== 
*/ void pzlaqgs(SuperMatrix *A, double *r, double *c, double rowcnd, double colcnd, double amax, char *equed) { #define THRESH (0.1) /* Local variables */ NRformat_loc *Astore; doublecomplex *Aval; int_t i, j, irow, jcol, m_loc; double large, small; double temp; /* Quick return if possible */ if (A->nrow <= 0 || A->ncol <= 0) { *(unsigned char *)equed = 'N'; return; } Astore = A->Store; Aval = Astore->nzval; m_loc = Astore->m_loc; /* Initialize LARGE and SMALL. */ small = dmach_dist("Safe minimum") / dmach_dist("Precision"); large = 1. / small; if (rowcnd >= THRESH && amax >= small && amax <= large) { if (colcnd >= THRESH) *(unsigned char *)equed = 'N'; else { /* Column scaling */ irow = Astore->fst_row; for (i = 0; i < m_loc; ++i) { for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) { jcol = Astore->colind[j]; zd_mult(&Aval[j], &Aval[j], c[jcol]); } ++irow; } *(unsigned char *)equed = 'C'; } } else if (colcnd >= THRESH) { /* Row scaling, no column scaling */ irow = Astore->fst_row; for (i = 0; i < m_loc; ++i) { for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) zd_mult(&Aval[j], &Aval[j], r[irow]); ++irow; } *(unsigned char *)equed = 'R'; } else { /* Both row and column scaling */ irow = Astore->fst_row; for (i = 0; i < m_loc; ++i) { for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) { jcol = Astore->colind[j]; temp = r[irow] * c[jcol]; zd_mult(&Aval[j], &Aval[j], temp); } ++irow; } *(unsigned char *)equed = 'B'; } return; } /* pzlaqgs */ SuperLU_DIST_5.3.0/SRC/symbfact.c0000644013363400111340000006771113233431301015253 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Performs a symbolic factorization * *
 * -- Distributed SuperLU routine (version 1.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999

  Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
 
  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 
  Permission is hereby granted to use or copy this program for any
  purpose, provided the above notices are retained on all copies.
  Permission to modify the code and to distribute modified code is
  granted, provided the above notices are retained, and a notice that
  the code was modified is included with the above copyright notice.
 * 
*/ /* * Modified by X. S. Li. */ #include "superlu_ddefs.h" /* What type of supernodes we want */ #define T2_SUPER /* * Internal protypes */ static void relax_snode(int_t, int_t *, int_t, int_t *, int_t *); static int_t snode_dfs(SuperMatrix *, const int_t, const int_t, int_t *, int_t *, Glu_persist_t *, Glu_freeable_t *); static int_t column_dfs(SuperMatrix *, const int_t, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, Glu_persist_t *, Glu_freeable_t *); static int_t pivotL(const int_t, int_t *, int_t *, Glu_persist_t *, Glu_freeable_t *); static int_t set_usub(const int_t, const int_t, const int_t, int_t *, int_t *, Glu_persist_t *, Glu_freeable_t *); static void pruneL(const int_t, const int_t *, const int_t, const int_t, const int_t *, const int_t *, int_t *, Glu_persist_t *, Glu_freeable_t *); /************************************************************************/ /*! \brief * *
 * Purpose
 * =======
 *   symbfact() performs a symbolic factorization on matrix A and sets up 
 *   the nonzero data structures which are suitable for supernodal Gaussian
 *   elimination with no pivoting (GENP). This routine features:
 *        o depth-first search (DFS)
 *        o supernodes
 *        o symmetric structure pruning
 *
 * Return value
 * ============
 *   < 0, number of bytes needed for LSUB.
 *   = 0, matrix dimension is 1.
 *   > 0, number of bytes allocated when out of memory.
 * 
*/ int_t symbfact /************************************************************************/ ( superlu_dist_options_t *options, /* input options */ int pnum, /* process number */ SuperMatrix *A, /* original matrix A permuted by columns (input) */ int_t *perm_c, /* column permutation vector (input) */ int_t *etree, /* column elimination tree (input) */ Glu_persist_t *Glu_persist, /* output */ Glu_freeable_t *Glu_freeable /* output */ ) { int_t m, n, min_mn, j, i, k, irep, nseg, pivrow, info; int_t *iwork, *perm_r, *segrep, *repfnz; int_t *xprune, *marker, *parent, *xplore; int_t relax, *desc, *relax_end; long long int nnzL, nnzU, nnzLU, nnzLSUB; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(pnum, "Enter symbfact()"); #endif m = A->nrow; n = A->ncol; min_mn = SUPERLU_MIN(m, n); /* Allocate storage common to the symbolic factor routines */ info = symbfact_SubInit(DOFACT, NULL, 0, m, n, ((NCPformat*)A->Store)->nnz, Glu_persist, Glu_freeable); iwork = (int_t *) intMalloc_dist(6*m+2*n); perm_r = iwork; segrep = iwork + m; repfnz = segrep + m; marker = repfnz + m; parent = marker + m; xplore = parent + m; xprune = xplore + m; relax_end = xprune + n; relax = sp_ienv_dist(2); ifill_dist(perm_r, m, EMPTY); ifill_dist(repfnz, m, EMPTY); ifill_dist(marker, m, EMPTY); Glu_persist->supno[0] = -1; Glu_persist->xsup[0] = 0; Glu_freeable->xlsub[0] = 0; Glu_freeable->xusub[0] = 0; /*for (j = 0; j < n; ++j) iperm_c[perm_c[j]] = j;*/ /* Identify relaxed supernodes. */ if ( !(desc = intMalloc_dist(n+1)) ) ABORT("Malloc fails for desc[]");; relax_snode(n, etree, relax, desc, relax_end); SUPERLU_FREE(desc); for (j = 0; j < min_mn; ) { if ( relax_end[j] != EMPTY ) { /* beginning of a relaxed snode */ k = relax_end[j]; /* end of the relaxed snode */ /* Determine union of the row structure of supernode (j:k). */ if ( (info = snode_dfs(A, j, k, xprune, marker, Glu_persist, Glu_freeable)) != 0 ) return info; for (i = j; i <= k; ++i) pivotL(i, perm_r, &pivrow, Glu_persist, Glu_freeable); j = k+1; } else { /* Perform a symbolic factorization on column j, and detects whether column j starts a new supernode. */ if ((info = column_dfs(A, j, perm_r, &nseg, segrep, repfnz, xprune, marker, parent, xplore, Glu_persist, Glu_freeable)) != 0) return info; /* Copy the U-segments to usub[*]. */ if ((info = set_usub(min_mn, j, nseg, segrep, repfnz, Glu_persist, Glu_freeable)) != 0) return info; pivotL(j, perm_r, &pivrow, Glu_persist, Glu_freeable); /* Prune columns [0:j-1] using column j. */ pruneL(j, perm_r, pivrow, nseg, segrep, repfnz, xprune, Glu_persist, Glu_freeable); /* Reset repfnz[*] to prepare for the next column. */ for (i = 0; i < nseg; i++) { irep = segrep[i]; repfnz[irep] = EMPTY; } ++j; } /* else */ } /* for j ... */ countnz_dist(min_mn, xprune, &nnzL, &nnzU, Glu_persist, Glu_freeable); /* Apply perm_r to L; Compress LSUB array. */ nnzLSUB = fixupL_dist(min_mn, perm_r, Glu_persist, Glu_freeable); if ( !pnum && (options->PrintStat == YES)) { nnzLU = nnzL + nnzU - min_mn; printf("\tNonzeros in L %lld\n", nnzL); printf("\tNonzeros in U %lld\n", nnzU); printf("\tnonzeros in L+U %lld\n", nnzLU); printf("\tnonzeros in LSUB %lld\n", nnzLSUB); } SUPERLU_FREE(iwork); #if ( PRNTlevel>=3 ) PrintInt10("lsub", Glu_freeable->xlsub[n], Glu_freeable->lsub); PrintInt10("xlsub", n+1, Glu_freeable->xlsub); PrintInt10("xprune", n, xprune); PrintInt10("usub", Glu_freeable->xusub[n], Glu_freeable->usub); PrintInt10("xusub", n+1, Glu_freeable->xusub); PrintInt10("supno", n, Glu_persist->supno); PrintInt10("xsup", (Glu_persist->supno[n])+2, Glu_persist->xsup); #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC(pnum, "Exit symbfact()"); #endif /* return (-i); */ return (-nnzLSUB); } /* SYMBFACT */ /************************************************************************/ /*! \brief * *
 * Purpose
 * =======
 *   relax_snode() identifies the initial relaxed supernodes, assuming that 
 *   the matrix has been reordered according to an postorder of the etree.
 * 
*/ static void relax_snode /************************************************************************/ ( const int_t n, /* number of columns in the matrix (input) */ int_t *et, /* column elimination tree (input) */ const int_t relax, /* max no of columns allowed in a relaxed snode (input) */ int_t *desc, /* number of descendants of each etree node. */ int_t *relax_end /* last column in a supernode (output) */ ) { register int_t j, parent, nsuper; register int_t fsupc; /* beginning of a snode */ ifill_dist(relax_end, n, EMPTY); ifill_dist(desc, n+1, 0); nsuper = 0; /* Compute the number of descendants of each node in the etree. */ for (j = 0; j < n; j++) { parent = et[j]; if ( parent != n ) /* not the dummy root */ desc[parent] += desc[j] + 1; } /* Identify the relaxed supernodes by postorder traversal of the etree. */ for (j = 0; j < n; ) { parent = et[j]; fsupc = j; while ( parent != n && desc[parent] < relax ) { j = parent; parent = et[j]; } /* Found a supernode with j being the last column. */ relax_end[fsupc] = j; /* Last column is recorded. */ ++nsuper; ++j; /* Search for a new leaf. */ while ( desc[j] != 0 && j < n ) ++j; } #if ( DEBUGlevel>=1 ) printf(".. No of relaxed snodes: " IFMT "\trelax: " IFMT "\n", nsuper, relax); #endif } /* RELAX_SNODE */ /************************************************************************/ /*! \brief * *
 
 * Purpose
 * =======
 *    snode_dfs() determines the union of the row structures of those 
 *    columns within the relaxed snode.
 *    Note: The relaxed snodes are leaves of the supernodal etree, therefore, 
 *    the part outside the rectangular supernode must be zero.
 *
 * Return value
 * ============
 *    0   success;
 *   >0   number of bytes allocated when run out of memory.
 * 
*/ static int_t snode_dfs /************************************************************************/ ( SuperMatrix *A, /* original matrix A permuted by columns (input) */ const int_t jcol, /* beginning of the supernode (input) */ const int_t kcol, /* end of the supernode (input) */ int_t *xprune, /* pruned location in each adjacency list (output) */ int_t *marker, /* working array of size m */ Glu_persist_t *Glu_persist, /* global LU data structures (modified) */ Glu_freeable_t *Glu_freeable ) { NCPformat *Astore; int_t *asub, *xa_begin, *xa_end; register int_t i, k, ifrom, ito, nextl, new_next; int_t nsuper, krow, kmark, mem_error; int_t *xsup, *supno; int_t *lsub, *xlsub; int_t nzlmax, nextu; Astore = A->Store; asub = Astore->rowind; xa_begin = Astore->colbeg; xa_end = Astore->colend; xsup = Glu_persist->xsup; supno = Glu_persist->supno; lsub = Glu_freeable->lsub; xlsub = Glu_freeable->xlsub; nzlmax = Glu_freeable->nzlmax; nsuper = ++supno[jcol]; /* Next available supernode number */ nextl = xlsub[jcol]; nextu = Glu_freeable->xusub[jcol]; for (i = jcol; i <= kcol; i++) { /* For each nonzero in A[*,i] */ for (k = xa_begin[i]; k < xa_end[i]; ++k) { krow = asub[k]; kmark = marker[krow]; if ( kmark != kcol ) { /* First time visit krow */ marker[krow] = kcol; lsub[nextl++] = krow; if ( nextl >= nzlmax ) { if (mem_error = symbfact_SubXpand(A->ncol, jcol, nextl, (MemType) LSUB, &nzlmax, Glu_freeable)) return (mem_error); lsub = Glu_freeable->lsub; } } } supno[i] = nsuper; Glu_freeable->xusub[i+1] = nextu; /* Tidy up the pointers in usub[*]. */ } /* Supernode > 1, then make a copy of the subscripts for pruning */ if ( jcol < kcol ) { new_next = nextl + (nextl - xlsub[jcol]); while ( new_next > nzlmax ) { if (mem_error = symbfact_SubXpand(A->ncol, jcol, nextl, (MemType) LSUB, &nzlmax, Glu_freeable)) return (mem_error); lsub = Glu_freeable->lsub; } ito = nextl; for (ifrom = xlsub[jcol]; ifrom < nextl; ) lsub[ito++] = lsub[ifrom++]; for (i = jcol+1; i <= kcol; i++) xlsub[i] = nextl; nextl = ito; } xsup[nsuper+1] = kcol + 1; supno[kcol+1] = nsuper; xprune[kcol] = nextl; xlsub[kcol+1] = nextl; #if ( PRNTlevel>=3 ) printf(".. snode_dfs(): (%8d:%8d) nextl %d\n", jcol, kcol, nextl); #endif return 0; } /* SNODE_DFS */ /************************************************************************/ /*! \brief * *
 * Purpose
 * =======
 *   column_dfs() performs a symbolic factorization on column jcol, and
 *   detects the supernode boundary. This routine uses the row indices of
 *   A[*,jcol] to start the depth-first search (DFS).
 *
 * Output
 * ======
 *   A supernode representative is the last column of a supernode.
 *   The nonzeros in U[*,j] are segments that end at supernodal
 *   representatives. The routine returns a list of such supernodal 
 *   representatives ( segrep[*] ) in topological order of the DFS that 
 *   generates them. The location of the first nonzero in each such 
 *   supernodal segment is also returned ( repfnz[*] ).
 *
 * Data structure
 * ==============
 *   (lsub, xlsub):
 *      lsub[*] contains the compressed subscripts of the supernodes;
 *      xlsub[j] points to the starting location of the j-th column in
 *               lsub[*]; 
 *	Storage: original row subscripts in A.
 *
 *      During the course of symbolic factorization, we also use
 *	(lsub, xlsub, xprune) for the purpose of symmetric pruning.
 *      For each supernode {s,s+1,...,t=s+r} with first column s and last
 *	column t, there are two subscript sets,  the last column
 *      structures (for pruning) will be removed in the end.
 *        o lsub[j], j = xlsub[s], ..., xlsub[s+1]-1
 *          is the structure of column s (i.e. structure of this supernode).
 *          It is used for the storage of numerical values.
 *	  o lsub[j], j = xlsub[t], ..., xlsub[t+1]-1
 *	    is the structure of the last column t of this supernode.
 *	    It is for the purpose of symmetric pruning. Therefore, the
 *	    structural subscripts can be rearranged without making physical
 *	    interchanges among the numerical values.
 *
 *      (1) if t > s, only the subscript sets for column s and column t
 *          are stored. Column t represents pruned adjacency structure.
 *
 *                  --------------------------------------------
 *          lsub[*]    ... |   col s    |   col t   | ...
 *                  --------------------------------------------
 *                          ^            ^           ^
 *                       xlsub[s]    xlsub[s+1]  xlsub[t+1]
 *                                       :           :
 *                                       :         xprune[t]
 *                                   xlsub[t]      
 *                                   xprune[s]    
 *
 *      (2) if t == s, i.e., a singleton supernode, the same subscript set
 *          is used for both G(L) and pruned graph:
 *
 *                  --------------------------------------
 *          lsub[*]    ... |      s     | ...
 *                  --------------------------------------
 *                          ^            ^   
 *                       xlsub[s]   xlsub[s+1]  
 *                                  xprune[s]
 *
 *       DFS will traverse the second subscript list, i.e., the part of the
 *       pruned graph.
 *
 * Local parameters
 * ================
 *   nseg: no of segments in current U[*,j]
 *   jsuper: jsuper=EMPTY if column j does not belong to the same
 *	supernode as j-1. Otherwise, jsuper=nsuper.
 *
 *   marker: A-row --> A-row/col (0/1)
 *   repfnz: SuperA-col --> PA-row
 *   parent: SuperA-col --> SuperA-col
 *   xplore: SuperA-col --> index to L-structure
 *
 * Return value
 * ============
 *     0  success;
 *   > 0  number of bytes allocated when run out of space.
 * 
*/ static int_t column_dfs /************************************************************************/ ( SuperMatrix *A, /* original matrix A permuted by columns (input) */ const int_t jcol, /* current column number (input) */ int_t *perm_r, /* row permutation vector (input) */ int_t *nseg, /* number of U-segments in column jcol (output) */ int_t *segrep, /* list of U-segment representatives (output) */ int_t *repfnz, /* list of first nonzeros in the U-segments (output) */ int_t *xprune, /* pruned location in each adjacency list (output) */ int_t *marker, /* working array of size m */ int_t *parent, /* working array of size m */ int_t *xplore, /* working array of size m */ Glu_persist_t *Glu_persist, /* global LU data structures (modified) */ Glu_freeable_t *Glu_freeable ) { NCPformat *Astore; int_t *asub, *xa_begin, *xa_end; int_t jcolp1, jcolm1, jsuper, nsuper, nextl; int_t k, krep, krow, kmark, kperm; int_t fsupc; /* first column of a supernode */ int_t myfnz; /* first nonzero column of a U-segment */ int_t chperm, chmark, chrep, kchild; int_t xdfs, maxdfs, kpar, oldrep; int_t jptr, jm1ptr; int_t ito, ifrom, istop; /* used to compress row subscripts */ int_t *xsup, *supno, *lsub, *xlsub; int_t nzlmax; static int_t first = 1, maxsuper; int_t mem_error; /* Initializations */ Astore = A->Store; asub = Astore->rowind; xa_begin = Astore->colbeg; xa_end = Astore->colend; xsup = Glu_persist->xsup; supno = Glu_persist->supno; lsub = Glu_freeable->lsub; xlsub = Glu_freeable->xlsub; nzlmax = Glu_freeable->nzlmax; jcolp1 = jcol + 1; jcolm1 = jcol - 1; jsuper = nsuper = supno[jcol]; nextl = xlsub[jcol]; if ( first ) { maxsuper = sp_ienv_dist(3); first = 0; } *nseg = 0; /* For each nonzero in A[*,jcol] perform depth-first search. */ for (k = xa_begin[jcol]; k < xa_end[jcol]; ++k) { krow = asub[k]; kmark = marker[krow]; /* krow was visited before, go to the next nonzero. */ if ( kmark == jcol ) continue; /* * For each unmarked neighber krow of jcol ... */ marker[krow] = jcol; /* mark as "visited" */ kperm = perm_r[krow]; if ( kperm == EMPTY ) { /* --------------- * krow is in L * --------------- * place it in structure of L[*,jcol]. */ lsub[nextl++] = krow; /* krow is indexed into A */ if ( nextl >= nzlmax ) { if ( mem_error = symbfact_SubXpand(A->ncol, jcol, nextl, (MemType) LSUB, &nzlmax, Glu_freeable) ) return (mem_error); lsub = Glu_freeable->lsub; } if ( kmark != jcolm1 ) jsuper = EMPTY; /* Row index subset test */ } else { /* --------------- * krow is in U * --------------- * If its supernode krep has been explored, update repfnz[*]. */ krep = xsup[supno[kperm]+1] - 1; myfnz = repfnz[krep]; if ( myfnz != EMPTY ) { /* krep was visited before */ if ( kperm < myfnz ) repfnz[krep] = kperm; /* continue; */ } else { /* Otherwise perform DFS, starting at krep */ oldrep = EMPTY; parent[krep] = oldrep; repfnz[krep] = kperm; xdfs = xlsub[krep]; maxdfs = xprune[krep]; do { /* * For each unmarked kchild of krep */ while ( xdfs < maxdfs ) { kchild = lsub[xdfs++]; chmark = marker[kchild]; if ( chmark != jcol ) { /* Not reached yet */ marker[kchild] = jcol; chperm = perm_r[kchild]; /* Case kchild is in L: place it in L[*,k] */ if ( chperm == EMPTY ) { lsub[nextl++] = kchild; if ( nextl >= nzlmax ) { if ( mem_error = symbfact_SubXpand(A->ncol, jcol, nextl, (MemType) LSUB, &nzlmax, Glu_freeable) ) return (mem_error); lsub = Glu_freeable->lsub; } if ( chmark != jcolm1 ) jsuper = EMPTY; } else { /* Case kchild is in U: * chrep = its supernode-rep. If its rep * has been explored, update its repfnz[*]. */ chrep = xsup[supno[chperm]+1] - 1; myfnz = repfnz[chrep]; if ( myfnz != EMPTY ) {/* Visited before */ if (chperm < myfnz) repfnz[chrep] = chperm; } else { /* Continue DFS at sup-rep of kchild */ xplore[krep] = xdfs; oldrep = krep; krep = chrep; /* Go deeper down G(L') */ parent[krep] = oldrep; repfnz[krep] = chperm; xdfs = xlsub[krep]; maxdfs = xprune[krep]; } /* else */ } /* else */ } /* if chmark != jcol */ } /* while */ /* krow has no more unexplored neighbors: * place supernode-rep krep in postorder DFS; * backtrack DFS to its parent. */ segrep[*nseg] = krep; ++(*nseg); kpar = parent[krep]; /* Pop from stack; recurse */ if ( kpar == EMPTY ) break; /* DFS done */ krep = kpar; xdfs = xplore[krep]; maxdfs = xprune[krep]; } while ( kpar != EMPTY ); /* Until empty stack */ } /* else */ } /* else: krow is in U */ } /* for each nonzero in A[*, jcol] */ /* Check to see if jcol belongs in the same supernode as jcol-1 */ if ( jcol == 0 ) { /* Do nothing for column 0 */ nsuper = supno[0] = 0; } else { fsupc = xsup[nsuper]; jptr = xlsub[jcol]; /* Not compressed yet */ jm1ptr = xlsub[jcolm1]; #ifdef T2_SUPER if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = EMPTY; #endif /* Make sure the number of columns in a supernode doesn't exceed threshold. */ if ( jcol - fsupc >= maxsuper ) jsuper = EMPTY; /* If jcol starts a new supernode, reclaim storage space in * lsub[*] from the previous supernode. Note we only store * the subscript set of the first and last columns of * a supernode. (first for G(L'), last for pruned graph) */ if ( jsuper ==EMPTY ) { /* Starts a new supernode */ if ( (fsupc < jcolm1-1) ) { /* >= 3 columns in nsuper */ #ifdef CHK_COMPRESS printf(" Compress lsub[] at super %d-%d\n",fsupc,jcolm1); #endif ito = xlsub[fsupc+1]; xlsub[jcolm1] = ito; istop = ito + jptr - jm1ptr; xprune[jcolm1] = istop; /* Initialize xprune[jcol-1] */ xlsub[jcol] = istop; for (ifrom = jm1ptr; ifrom < nextl; ++ifrom, ++ito) lsub[ito] = lsub[ifrom]; nextl = ito; /* = istop + length(jcol) */ } ++nsuper; supno[jcol] = nsuper; } /* if a new supernode */ } /* else: jcol > 0 */ /* Tidy up the pointers before exit */ xsup[nsuper+1] = jcolp1; supno[jcolp1] = nsuper; xprune[jcol] = nextl; /* Initialize an upper bound for pruning. */ xlsub[jcolp1] = nextl; return 0; } /* COLUMN_DFS */ /************************************************************************/ /*! \brief * *
 * Purpose
 * =======
 *   pivotL() interchanges row subscripts so that each diagonal block of a
 *   supernode in L has the row subscripts sorted in order of pivots.
 *   The row subscripts in the off-diagonal block are not sorted.
 * 
*/ static int_t pivotL /************************************************************************/ ( const int_t jcol, /* current column number (input) */ int_t *perm_r, /* row permutation vector (output) */ int_t *pivrow, /* the pivot row index (output) */ Glu_persist_t *Glu_persist, /* global LU data structures (modified) */ Glu_freeable_t *Glu_freeable ) { int_t fsupc; /* first column in the supernode */ int_t nsupc; /* number of columns in the supernode */ int_t nsupr; /* number of rows in the supernode */ int_t lptr; /* point_ts to the first subscript of the supernode */ int_t diag, diagind; int_t *lsub_ptr; int_t isub, itemp; int_t *lsub, *xlsub; /* Initialization. */ lsub = Glu_freeable->lsub; xlsub = Glu_freeable->xlsub; fsupc = (Glu_persist->xsup)[(Glu_persist->supno)[jcol]]; nsupc = jcol - fsupc; /* excluding jcol; nsupc >= 0 */ lptr = xlsub[fsupc]; nsupr = xlsub[fsupc+1] - lptr; lsub_ptr = &lsub[lptr]; /* start of row indices of the supernode */ /* Search for diagonal element. */ /* diagind = iperm_c[jcol];*/ diagind = jcol; diag = EMPTY; for (isub = nsupc; isub < nsupr; ++isub) if ( lsub_ptr[isub] == diagind ) { diag = isub; break; } /* Diagonal pivot exists? */ if ( diag == EMPTY ) { printf("At column " IFMT ", ", jcol); ABORT("pivotL() encounters zero diagonal"); } /* Record pivot row. */ *pivrow = lsub_ptr[diag]; perm_r[*pivrow] = jcol; /* perm_r[] should be Identity. */ /*assert(*pivrow==jcol);*/ /* Interchange row subscripts. */ if ( diag != nsupc ) { itemp = lsub_ptr[diag]; lsub_ptr[diag] = lsub_ptr[nsupc]; lsub_ptr[nsupc] = itemp; } return 0; } /* PIVOTL */ /************************************************************************/ /*! \brief * *
 
 * Purpose
 * =======
 *   set_usub() sets up data structure to store supernodal segments in U.
 *   The supernodal segments in each column are stored in topological order.
 *   
 * NOTE
 * ====
 *   For each supernodal segment, we only store the index of the first
 *   nonzero index, rather than the indices of the whole segment, because
 *   those indices can be generated from first nonzero and supnodal
 *   representative.
 *   Therefore, for G(U), we store the "skeleton" of it.
 * 
*/ static int_t set_usub /************************************************************************/ ( const int_t n, /* total number of columns (input) */ const int_t jcol, /* current column number (input) */ const int_t nseg, /* number of supernodal segments in U[*,jcol] (input) */ int_t *segrep, /* list of U-segment representatives (output) */ int_t *repfnz, /* list of first nonzeros in the U-segments (output) */ Glu_persist_t *Glu_persist, /* global LU data structures (modified) */ Glu_freeable_t *Glu_freeable ) { int_t ksub, krep, ksupno; int_t k, kfnz; int_t jsupno, nextu; int_t new_next, mem_error; int_t *supno; int_t *usub, *xusub; int_t nzumax; supno = Glu_persist->supno; usub = Glu_freeable->usub; xusub = Glu_freeable->xusub; nzumax = Glu_freeable->nzumax; jsupno = supno[jcol]; nextu = xusub[jcol]; new_next = nextu + nseg; while ( new_next > nzumax ) { if (mem_error = symbfact_SubXpand(n, jcol, nextu, (MemType) USUB, &nzumax, Glu_freeable)) return (mem_error); usub = Glu_freeable->usub; } /* We store U-segments in topological order. */ k = nseg - 1; for (ksub = 0; ksub < nseg; ++ksub) { krep = segrep[k--]; ksupno = supno[krep]; if ( ksupno != jsupno ) { /* Should go into usub[*] */ kfnz = repfnz[krep]; if ( kfnz != EMPTY ) { /* Nonzero U-segment */ usub[nextu++] = kfnz; /* fsupc = xsup[ksupno]; isub = xlsub[fsupc] + kfnz - fsupc; irow = lsub[isub]; usub[nextu++] = perm_r[irow];*/ } /* if ... */ } /* if ... */ } /* for each segment... */ xusub[jcol + 1] = nextu; /* Close U[*,jcol] */ return 0; } /* SET_USUB */ /************************************************************************/ static void pruneL /************************************************************************/ ( const int_t jcol, /* in */ const int_t *perm_r, /* in */ const int_t pivrow, /* in */ const int_t nseg, /* in */ const int_t *segrep, /* in */ const int_t *repfnz, /* in */ int_t *xprune, /* out */ Glu_persist_t *Glu_persist, /* global LU data structures (modified) */ Glu_freeable_t *Glu_freeable ) { /* * Purpose * ======= * pruneL() prunes the L-structure of supernodes whose L-structure * contains the current pivot row "pivrow". * */ int_t jsupno, irep, irep1, kmin, kmax, krow; int_t i, ktemp; int_t do_prune; /* logical variable */ int_t *supno; int_t *lsub, *xlsub; supno = Glu_persist->supno; lsub = Glu_freeable->lsub; xlsub = Glu_freeable->xlsub; /* * For each supernode-rep irep in U[*,j] */ jsupno = supno[jcol]; for (i = 0; i < nseg; i++) { irep = segrep[i]; irep1 = irep + 1; /* Do not prune with a zero U-segment */ if ( repfnz[irep] == EMPTY ) continue; /* * If irep has not been pruned & it has a nonzero in row L[pivrow,i] */ do_prune = FALSE; if ( supno[irep] != jsupno ) { if ( xprune[irep] >= xlsub[irep1] ) { kmin = xlsub[irep]; kmax = xlsub[irep1] - 1; for (krow = kmin; krow <= kmax; ++krow) if ( lsub[krow] == pivrow ) { do_prune = TRUE; break; } } if ( do_prune ) { /* Do a quicksort-type partition. */ while ( kmin <= kmax ) { if ( perm_r[lsub[kmax]] == EMPTY ) kmax--; else if ( perm_r[lsub[kmin]] != EMPTY ) kmin++; else { /* kmin below pivrow, and kmax above pivrow: * interchange the two subscripts */ ktemp = lsub[kmin]; lsub[kmin] = lsub[kmax]; lsub[kmax] = ktemp; kmin++; kmax--; } } /* while */ xprune[irep] = kmin; /* Pruning */ #if ( DEBUGlevel>=3 ) printf(".. pruneL(): use col %d: xprune[%d] = %d\n", jcol, irep, kmin); #endif } /* if do_prune */ } /* if */ } /* for each U-segment ... */ } /* PRUNEL */ SuperLU_DIST_5.3.0/SRC/pzutil.c0000644013363400111340000004471113233431301014765 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Several matrix utilities * *
 * -- Distributed SuperLU routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * March 15, 2003
 * 
*/ #include #include "superlu_zdefs.h" /*! \brief Gather A from the distributed compressed row format to global A in compressed column format. */ int pzCompRow_loc_to_CompCol_global ( int_t need_value, /* Input. Whether need to gather numerical values */ SuperMatrix *A, /* Input. Distributed matrix in NRformat_loc format. */ gridinfo_t *grid, /* Input */ SuperMatrix *GA /* Output */ ) { NRformat_loc *Astore; NCformat *GAstore; doublecomplex *a, *a_loc; int_t *colind, *rowptr; int_t *colptr_loc, *rowind_loc; int_t m_loc, n, i, j, k, l; int_t colnnz, fst_row, nnz_loc, nnz; doublecomplex *a_recv; /* Buffer to receive the blocks of values. */ doublecomplex *a_buf; /* Buffer to merge blocks into block columns. */ int_t *itemp; int_t *colptr_send; /* Buffer to redistribute the column pointers of the local block rows. Use n_loc+1 pointers for each block. */ int_t *colptr_blk; /* The column pointers for each block, after redistribution to the local block columns. Use n_loc+1 pointers for each block. */ int_t *rowind_recv; /* Buffer to receive the blocks of row indices. */ int_t *rowind_buf; /* Buffer to merge blocks into block columns. */ int_t *fst_rows, *n_locs; int *sendcnts, *sdispls, *recvcnts, *rdispls, *itemp_32; int it, n_loc, procs; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Enter pzCompRow_loc_to_CompCol_global"); #endif /* Initialization. */ n = A->ncol; Astore = (NRformat_loc *) A->Store; nnz_loc = Astore->nnz_loc; m_loc = Astore->m_loc; fst_row = Astore->fst_row; a = Astore->nzval; rowptr = Astore->rowptr; colind = Astore->colind; n_loc = m_loc; /* NOTE: CURRENTLY ONLY WORK FOR SQUARE MATRIX */ /* ------------------------------------------------------------ FIRST PHASE: TRANSFORM A INTO DISTRIBUTED COMPRESSED COLUMN. ------------------------------------------------------------*/ zCompRow_to_CompCol_dist(m_loc, n, nnz_loc, a, colind, rowptr, &a_loc, &rowind_loc, &colptr_loc); /* Change local row index numbers to global numbers. */ for (i = 0; i < nnz_loc; ++i) rowind_loc[i] += fst_row; #if ( DEBUGlevel>=2 ) printf("Proc %d\n", grid->iam); PrintInt10("rowind_loc", nnz_loc, rowind_loc); PrintInt10("colptr_loc", n+1, colptr_loc); #endif procs = grid->nprow * grid->npcol; if ( !(fst_rows = (int_t *) intMalloc_dist(2*procs)) ) ABORT("Malloc fails for fst_rows[]"); n_locs = fst_rows + procs; MPI_Allgather(&fst_row, 1, mpi_int_t, fst_rows, 1, mpi_int_t, grid->comm); for (i = 0; i < procs-1; ++i) n_locs[i] = fst_rows[i+1] - fst_rows[i]; n_locs[procs-1] = n - fst_rows[procs-1]; if ( !(recvcnts = SUPERLU_MALLOC(5*procs * sizeof(int))) ) ABORT("Malloc fails for recvcnts[]"); sendcnts = recvcnts + procs; rdispls = sendcnts + procs; sdispls = rdispls + procs; itemp_32 = sdispls + procs; /* All-to-all transfer column pointers of each block. Now the matrix view is P-by-P block-partition. */ /* n column starts for each column, and procs column ends for each block */ if ( !(colptr_send = intMalloc_dist(n + procs)) ) ABORT("Malloc fails for colptr_send[]"); if ( !(colptr_blk = intMalloc_dist( (((size_t) n_loc)+1)*procs)) ) ABORT("Malloc fails for colptr_blk[]"); for (i = 0, j = 0; i < procs; ++i) { for (k = j; k < j + n_locs[i]; ++k) colptr_send[i+k] = colptr_loc[k]; colptr_send[i+k] = colptr_loc[k]; /* Add an END marker */ sendcnts[i] = n_locs[i] + 1; #if ( DEBUGlevel>=1 ) assert(j == fst_rows[i]); #endif sdispls[i] = j + i; recvcnts[i] = n_loc + 1; rdispls[i] = i * (n_loc + 1); j += n_locs[i]; /* First column of next block in colptr_loc[] */ } MPI_Alltoallv(colptr_send, sendcnts, sdispls, mpi_int_t, colptr_blk, recvcnts, rdispls, mpi_int_t, grid->comm); /* Adjust colptr_blk[] so that they contain the local indices of the column pointers in the receive buffer. */ nnz = 0; /* The running sum of the nonzeros counted by far */ k = 0; for (i = 0; i < procs; ++i) { for (j = rdispls[i]; j < rdispls[i] + n_loc; ++j) { colnnz = colptr_blk[j+1] - colptr_blk[j]; /*assert(k<=j);*/ colptr_blk[k] = nnz; nnz += colnnz; /* Start of the next column */ ++k; } colptr_blk[k++] = nnz; /* Add an END marker for each block */ } /*assert(k == (n_loc+1)*procs);*/ /* Now prepare to transfer row indices and values. */ sdispls[0] = 0; for (i = 0; i < procs-1; ++i) { sendcnts[i] = colptr_loc[fst_rows[i+1]] - colptr_loc[fst_rows[i]]; sdispls[i+1] = sdispls[i] + sendcnts[i]; } sendcnts[procs-1] = colptr_loc[n] - colptr_loc[fst_rows[procs-1]]; for (i = 0; i < procs; ++i) { j = rdispls[i]; /* Point to this block in colptr_blk[]. */ recvcnts[i] = colptr_blk[j+n_loc] - colptr_blk[j]; } rdispls[0] = 0; /* Recompute rdispls[] for row indices. */ for (i = 0; i < procs-1; ++i) rdispls[i+1] = rdispls[i] + recvcnts[i]; k = rdispls[procs-1] + recvcnts[procs-1]; /* Total received */ if ( !(rowind_recv = (int_t *) intMalloc_dist(2*k)) ) ABORT("Malloc fails for rowind_recv[]"); rowind_buf = rowind_recv + k; MPI_Alltoallv(rowind_loc, sendcnts, sdispls, mpi_int_t, rowind_recv, recvcnts, rdispls, mpi_int_t, grid->comm); if ( need_value ) { if ( !(a_recv = (doublecomplex *) doublecomplexMalloc_dist(2*k)) ) ABORT("Malloc fails for rowind_recv[]"); a_buf = a_recv + k; MPI_Alltoallv(a_loc, sendcnts, sdispls, SuperLU_MPI_DOUBLE_COMPLEX, a_recv, recvcnts, rdispls, SuperLU_MPI_DOUBLE_COMPLEX, grid->comm); } /* Reset colptr_loc[] to point to the n_loc global columns. */ colptr_loc[0] = 0; itemp = colptr_send; for (j = 0; j < n_loc; ++j) { colnnz = 0; for (i = 0; i < procs; ++i) { k = i * (n_loc + 1) + j; /* j-th column in i-th block */ colnnz += colptr_blk[k+1] - colptr_blk[k]; } colptr_loc[j+1] = colptr_loc[j] + colnnz; itemp[j] = colptr_loc[j]; /* Save a copy of the column starts */ } itemp[n_loc] = colptr_loc[n_loc]; /* Merge blocks of row indices into columns of row indices. */ for (i = 0; i < procs; ++i) { k = i * (n_loc + 1); for (j = 0; j < n_loc; ++j) { /* i-th block */ for (l = colptr_blk[k+j]; l < colptr_blk[k+j+1]; ++l) { rowind_buf[itemp[j]] = rowind_recv[l]; ++itemp[j]; } } } if ( need_value ) { for (j = 0; j < n_loc+1; ++j) itemp[j] = colptr_loc[j]; for (i = 0; i < procs; ++i) { k = i * (n_loc + 1); for (j = 0; j < n_loc; ++j) { /* i-th block */ for (l = colptr_blk[k+j]; l < colptr_blk[k+j+1]; ++l) { a_buf[itemp[j]] = a_recv[l]; ++itemp[j]; } } } } /* ------------------------------------------------------------ SECOND PHASE: GATHER TO GLOBAL A IN COMPRESSED COLUMN FORMAT. ------------------------------------------------------------*/ GA->nrow = A->nrow; GA->ncol = A->ncol; GA->Stype = SLU_NC; GA->Dtype = A->Dtype; GA->Mtype = A->Mtype; GAstore = GA->Store = (NCformat *) SUPERLU_MALLOC ( sizeof(NCformat) ); if ( !GAstore ) ABORT ("SUPERLU_MALLOC fails for GAstore"); /* First gather the size of each piece. */ nnz_loc = colptr_loc[n_loc]; MPI_Allgather(&nnz_loc, 1, mpi_int_t, itemp, 1, mpi_int_t, grid->comm); for (i = 0, nnz = 0; i < procs; ++i) nnz += itemp[i]; GAstore->nnz = nnz; if ( !(GAstore->rowind = (int_t *) intMalloc_dist (nnz)) ) ABORT ("SUPERLU_MALLOC fails for GAstore->rowind[]"); if ( !(GAstore->colptr = (int_t *) intMalloc_dist (n+1)) ) ABORT ("SUPERLU_MALLOC fails for GAstore->colptr[]"); /* Allgatherv for row indices. */ rdispls[0] = 0; for (i = 0; i < procs-1; ++i) { rdispls[i+1] = rdispls[i] + itemp[i]; itemp_32[i] = itemp[i]; } itemp_32[procs-1] = itemp[procs-1]; it = nnz_loc; MPI_Allgatherv(rowind_buf, it, mpi_int_t, GAstore->rowind, itemp_32, rdispls, mpi_int_t, grid->comm); if ( need_value ) { if ( !(GAstore->nzval = (doublecomplex *) doublecomplexMalloc_dist (nnz)) ) ABORT ("SUPERLU_MALLOC fails for GAstore->rnzval[]"); MPI_Allgatherv(a_buf, it, SuperLU_MPI_DOUBLE_COMPLEX, GAstore->nzval, itemp_32, rdispls, SuperLU_MPI_DOUBLE_COMPLEX, grid->comm); } else GAstore->nzval = NULL; /* Now gather the column pointers. */ rdispls[0] = 0; for (i = 0; i < procs-1; ++i) { rdispls[i+1] = rdispls[i] + n_locs[i]; itemp_32[i] = n_locs[i]; } itemp_32[procs-1] = n_locs[procs-1]; MPI_Allgatherv(colptr_loc, n_loc, mpi_int_t, GAstore->colptr, itemp_32, rdispls, mpi_int_t, grid->comm); /* Recompute column pointers. */ for (i = 1; i < procs; ++i) { k = rdispls[i]; for (j = 0; j < n_locs[i]; ++j) GAstore->colptr[k++] += itemp[i-1]; itemp[i] += itemp[i-1]; /* prefix sum */ } GAstore->colptr[n] = nnz; #if ( DEBUGlevel>=2 ) if ( !grid->iam ) { printf("After pdCompRow_loc_to_CompCol_global()\n"); zPrint_CompCol_Matrix_dist(GA); } #endif SUPERLU_FREE(a_loc); SUPERLU_FREE(rowind_loc); SUPERLU_FREE(colptr_loc); SUPERLU_FREE(fst_rows); SUPERLU_FREE(recvcnts); SUPERLU_FREE(colptr_send); SUPERLU_FREE(colptr_blk); SUPERLU_FREE(rowind_recv); if ( need_value) SUPERLU_FREE(a_recv); #if ( DEBUGlevel>=1 ) if ( !grid->iam ) printf("sizeof(NCformat) %lu\n", sizeof(NCformat)); CHECK_MALLOC(grid->iam, "Exit pzCompRow_loc_to_CompCol_global"); #endif return 0; } /* pzCompRow_loc_to_CompCol_global */ /*! \brief Permute the distributed dense matrix: B <= perm(X). perm[i] = j means the i-th row of X is in the j-th row of B. */ int pzPermute_Dense_Matrix ( int_t fst_row, int_t m_loc, int_t row_to_proc[], int_t perm[], doublecomplex X[], int ldx, doublecomplex B[], int ldb, int nrhs, gridinfo_t *grid ) { int_t i, j, k, l; int p, procs; int *sendcnts, *sendcnts_nrhs, *recvcnts, *recvcnts_nrhs; int *sdispls, *sdispls_nrhs, *rdispls, *rdispls_nrhs; int *ptr_to_ibuf, *ptr_to_dbuf; int_t *send_ibuf, *recv_ibuf; doublecomplex *send_dbuf, *recv_dbuf; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Enter pzPermute_Dense_Matrix()"); #endif procs = grid->nprow * grid->npcol; if ( !(sendcnts = SUPERLU_MALLOC(10*procs * sizeof(int))) ) ABORT("Malloc fails for sendcnts[]."); sendcnts_nrhs = sendcnts + procs; recvcnts = sendcnts_nrhs + procs; recvcnts_nrhs = recvcnts + procs; sdispls = recvcnts_nrhs + procs; sdispls_nrhs = sdispls + procs; rdispls = sdispls_nrhs + procs; rdispls_nrhs = rdispls + procs; ptr_to_ibuf = rdispls_nrhs + procs; ptr_to_dbuf = ptr_to_ibuf + procs; for (i = 0; i < procs; ++i) sendcnts[i] = 0; /* Count the number of X entries to be sent to each process.*/ for (i = fst_row; i < fst_row + m_loc; ++i) { p = row_to_proc[perm[i]]; ++sendcnts[p]; } MPI_Alltoall(sendcnts, 1, MPI_INT, recvcnts, 1, MPI_INT, grid->comm); sdispls[0] = rdispls[0] = 0; sdispls_nrhs[0] = rdispls_nrhs[0] = 0; sendcnts_nrhs[0] = sendcnts[0] * nrhs; recvcnts_nrhs[0] = recvcnts[0] * nrhs; for (i = 1; i < procs; ++i) { sdispls[i] = sdispls[i-1] + sendcnts[i-1]; sdispls_nrhs[i] = sdispls[i] * nrhs; rdispls[i] = rdispls[i-1] + recvcnts[i-1]; rdispls_nrhs[i] = rdispls[i] * nrhs; sendcnts_nrhs[i] = sendcnts[i] * nrhs; recvcnts_nrhs[i] = recvcnts[i] * nrhs; } k = sdispls[procs-1] + sendcnts[procs-1];/* Total number of sends */ l = rdispls[procs-1] + recvcnts[procs-1];/* Total number of recvs */ /*assert(k == m_loc);*/ /*assert(l == m_loc);*/ if ( !(send_ibuf = intMalloc_dist(k + l)) ) ABORT("Malloc fails for send_ibuf[]."); recv_ibuf = send_ibuf + k; if ( !(send_dbuf = doublecomplexMalloc_dist((k + l)*nrhs)) ) ABORT("Malloc fails for send_dbuf[]."); recv_dbuf = send_dbuf + k * nrhs; for (i = 0; i < procs; ++i) { ptr_to_ibuf[i] = sdispls[i]; ptr_to_dbuf[i] = sdispls_nrhs[i]; } /* Fill in the send buffers: send_ibuf[] and send_dbuf[]. */ for (i = fst_row; i < fst_row + m_loc; ++i) { j = perm[i]; p = row_to_proc[j]; send_ibuf[ptr_to_ibuf[p]] = j; j = ptr_to_dbuf[p]; RHS_ITERATE(k) { /* RHS stored in row major in the buffer */ send_dbuf[j++] = X[i-fst_row + k*ldx]; } ++ptr_to_ibuf[p]; ptr_to_dbuf[p] += nrhs; } /* Transfer the (permuted) row indices and numerical values. */ MPI_Alltoallv(send_ibuf, sendcnts, sdispls, mpi_int_t, recv_ibuf, recvcnts, rdispls, mpi_int_t, grid->comm); MPI_Alltoallv(send_dbuf, sendcnts_nrhs, sdispls_nrhs, SuperLU_MPI_DOUBLE_COMPLEX, recv_dbuf, recvcnts_nrhs, rdispls_nrhs, SuperLU_MPI_DOUBLE_COMPLEX, grid->comm); /* Copy the buffer into b. */ for (i = 0, l = 0; i < m_loc; ++i) { j = recv_ibuf[i] - fst_row; /* Relative row number */ RHS_ITERATE(k) { /* RHS stored in row major in the buffer */ B[j + k*ldb] = recv_dbuf[l++]; } } SUPERLU_FREE(sendcnts); SUPERLU_FREE(send_ibuf); SUPERLU_FREE(send_dbuf); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Exit pzPermute_Dense_Matrix()"); #endif return 0; } /* pzPermute_Dense_Matrix */ /*! \brief Initialize the data structure for the solution phase. */ int zSolveInit(superlu_dist_options_t *options, SuperMatrix *A, int_t perm_r[], int_t perm_c[], int_t nrhs, LUstruct_t *LUstruct, gridinfo_t *grid, SOLVEstruct_t *SOLVEstruct) { int_t *row_to_proc, *inv_perm_c, *itemp; NRformat_loc *Astore; int_t i, fst_row, m_loc, p; int procs; Astore = (NRformat_loc *) A->Store; fst_row = Astore->fst_row; m_loc = Astore->m_loc; procs = grid->nprow * grid->npcol; if ( !(row_to_proc = intMalloc_dist(A->nrow)) ) ABORT("Malloc fails for row_to_proc[]"); SOLVEstruct->row_to_proc = row_to_proc; if ( !(inv_perm_c = intMalloc_dist(A->ncol)) ) ABORT("Malloc fails for inv_perm_c[]."); for (i = 0; i < A->ncol; ++i) inv_perm_c[perm_c[i]] = i; SOLVEstruct->inv_perm_c = inv_perm_c; /* ------------------------------------------------------------ EVERY PROCESS NEEDS TO KNOW GLOBAL PARTITION. SET UP THE MAPPING BETWEEN ROWS AND PROCESSES. NOTE: For those processes that do not own any row, it must must be set so that fst_row == A->nrow. ------------------------------------------------------------*/ if ( !(itemp = intMalloc_dist(procs+1)) ) ABORT("Malloc fails for itemp[]"); MPI_Allgather(&fst_row, 1, mpi_int_t, itemp, 1, mpi_int_t, grid->comm); itemp[procs] = A->nrow; for (p = 0; p < procs; ++p) { for (i = itemp[p] ; i < itemp[p+1]; ++i) row_to_proc[i] = p; } #if ( DEBUGlevel>=2 ) if ( !grid->iam ) { printf("fst_row = %d\n", fst_row); PrintInt10("row_to_proc", A->nrow, row_to_proc); PrintInt10("inv_perm_c", A->ncol, inv_perm_c); } #endif SUPERLU_FREE(itemp); #if 0 /* Compute the mapping between rows and processes. */ /* XSL NOTE: What happens if # of mapped processes is smaller than total Procs? For the processes without any row, let fst_row be EMPTY (-1). Make sure this case works! */ MPI_Allgather(&fst_row, 1, mpi_int_t, itemp, 1, mpi_int_t, grid->comm); itemp[procs] = n; for (p = 0; p < procs; ++p) { j = itemp[p]; if ( j != EMPTY ) { k = itemp[p+1]; if ( k == EMPTY ) k = n; for (i = j ; i < k; ++i) row_to_proc[i] = p; } } #endif get_diag_procs(A->ncol, LUstruct->Glu_persist, grid, &SOLVEstruct->num_diag_procs, &SOLVEstruct->diag_procs, &SOLVEstruct->diag_len); /* Setup communication pattern for redistribution of B and X. */ if ( !(SOLVEstruct->gstrs_comm = (pxgstrs_comm_t *) SUPERLU_MALLOC(sizeof(pxgstrs_comm_t))) ) ABORT("Malloc fails for gstrs_comm[]"); pxgstrs_init(A->ncol, m_loc, nrhs, fst_row, perm_r, perm_c, grid, LUstruct->Glu_persist, SOLVEstruct); if ( !(SOLVEstruct->gsmv_comm = (pzgsmv_comm_t *) SUPERLU_MALLOC(sizeof(pzgsmv_comm_t))) ) ABORT("Malloc fails for gsmv_comm[]"); SOLVEstruct->A_colind_gsmv = NULL; options->SolveInitialized = YES; return 0; } /* zSolveInit */ /*! \brief Release the resources used for the solution phase. */ void zSolveFinalize(superlu_dist_options_t *options, SOLVEstruct_t *SOLVEstruct) { int_t *it; pxgstrs_finalize(SOLVEstruct->gstrs_comm); if ( options->RefineInitialized ) { pzgsmv_finalize(SOLVEstruct->gsmv_comm); options->RefineInitialized = NO; } SUPERLU_FREE(SOLVEstruct->gsmv_comm); SUPERLU_FREE(SOLVEstruct->row_to_proc); SUPERLU_FREE(SOLVEstruct->inv_perm_c); SUPERLU_FREE(SOLVEstruct->diag_procs); SUPERLU_FREE(SOLVEstruct->diag_len); if ( it = SOLVEstruct->A_colind_gsmv ) SUPERLU_FREE(it); options->SolveInitialized = NO; } /* zSolveFinalize */ /*! \brief Check the inf-norm of the error vector */ void pzinf_norm_error(int iam, int_t n, int_t nrhs, doublecomplex x[], int_t ldx, doublecomplex xtrue[], int_t ldxtrue, gridinfo_t *grid) { double err, xnorm, temperr, tempxnorm; doublecomplex *x_work, *xtrue_work; doublecomplex temp; int i, j; for (j = 0; j < nrhs; j++) { x_work = &x[j*ldx]; xtrue_work = &xtrue[j*ldxtrue]; err = xnorm = 0.0; for (i = 0; i < n; i++) { z_sub(&temp, &x_work[i], &xtrue_work[i]); err = SUPERLU_MAX(err, slud_z_abs(&temp)); xnorm = SUPERLU_MAX(xnorm, slud_z_abs(&x_work[i])); } /* get the golbal max err & xnrom */ temperr = err; tempxnorm = xnorm; MPI_Allreduce( &temperr, &err, 1, MPI_DOUBLE, MPI_MAX, grid->comm); MPI_Allreduce( &tempxnorm, &xnorm, 1, MPI_DOUBLE, MPI_MAX, grid->comm); err = err / xnorm; if ( !iam ) printf("\tSol %2d: ||X-Xtrue||/||X|| = %e\n", j, err); } } SuperLU_DIST_5.3.0/SRC/zmemory_dist.c0000644013363400111340000001070713233431301016161 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Memory utilities * *
 * -- Distributed SuperLU routine (version 4.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * October 1, 2014
 * 
*/ #include "superlu_zdefs.h" /* Variables external to this file */ extern LU_stack_t stack; void *zuser_malloc_dist(int_t bytes, int_t which_end) { void *buf; if ( StackFull(bytes) ) return (NULL); if ( which_end == HEAD ) { buf = (char*) stack.array + stack.top1; stack.top1 += bytes; } else { stack.top2 -= bytes; buf = (char*) stack.array + stack.top2; } stack.used += bytes; return buf; } void zuser_free_dist(int_t bytes, int_t which_end) { if ( which_end == HEAD ) { stack.top1 -= bytes; } else { stack.top2 += bytes; } stack.used -= bytes; } /*! \brief * *
 * mem_usage consists of the following fields:
 *    - for_lu (float)
 *      The amount of space used in bytes for the L\U data structures.
 *    - total (float)
 *      The amount of space needed in bytes to perform factorization.
 *    - expansions (int)
 *      Number of memory expansions during the LU factorization.
 * 
*/ int_t zQuerySpace_dist(int_t n, LUstruct_t *LUstruct, gridinfo_t *grid, SuperLUStat_t *stat, superlu_dist_mem_usage_t *mem_usage) { register int_t dword, gb, iword, k, nb, nsupers; int_t *index, *xsup; int iam, mycol, myrow; Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; iam = grid->iam; myrow = MYROW( iam, grid ); mycol = MYCOL( iam, grid ); iword = sizeof(int_t); dword = sizeof(doublecomplex); nsupers = Glu_persist->supno[n-1] + 1; xsup = Glu_persist->xsup; mem_usage->for_lu = 0.; /* For L factor */ nb = CEILING( nsupers, grid->npcol ); /* Number of local column blocks */ for (k = 0; k < nb; ++k) { gb = k * grid->npcol + mycol; /* Global block number. */ if ( gb < nsupers ) { index = Llu->Lrowind_bc_ptr[k]; if ( index ) { mem_usage->for_lu += (float) ((BC_HEADER + index[0]*LB_DESCRIPTOR + index[1]) * iword); mem_usage->for_lu += (float)(index[1]*SuperSize( gb )*dword); } } } /* For U factor */ nb = CEILING( nsupers, grid->nprow ); /* Number of local row blocks */ for (k = 0; k < nb; ++k) { gb = k * grid->nprow + myrow; /* Global block number. */ if ( gb < nsupers ) { index = Llu->Ufstnz_br_ptr[k]; if ( index ) { mem_usage->for_lu += (float)(index[2] * iword); mem_usage->for_lu += (float)(index[1] * dword); } } } /* Working storage to support factorization */ mem_usage->total = mem_usage->for_lu; #if 0 mem_usage->total += (float)(( Llu->bufmax[0] + Llu->bufmax[2] ) * iword + ( Llu->bufmax[1] + Llu->bufmax[3] + maxsup ) * dword ); /**** another buffer to use mpi_irecv in pdgstrf_irecv.c ****/ mem_usage->total += (float)( Llu->bufmax[0] * iword + Llu->bufmax[1] * dword ); mem_usage->total += (float)( maxsup * maxsup + maxsup) * iword; k = CEILING( nsupers, grid->nprow ); mem_usage->total += (float)(2 * k * iword); #else /*mem_usage->total += stat->current_buffer;*/ mem_usage->total += stat->peak_buffer; #if ( PRNTlevel>=1 ) if (iam==0) printf(".. zQuerySpace: peak_buffer %.2f (MB)\n", stat->peak_buffer * 1.0e-6); #endif #endif return 0; } /* zQuerySpace_dist */ /* * Allocate storage for original matrix A */ void zallocateA_dist(int_t n, int_t nnz, doublecomplex **a, int_t **asub, int_t **xa) { *a = (doublecomplex *) doublecomplexMalloc_dist(nnz); *asub = (int_t *) intMalloc_dist(nnz); *xa = (int_t *) intMalloc_dist(n+1); } doublecomplex *doublecomplexMalloc_dist(int_t n) { doublecomplex *buf; buf = (doublecomplex *) SUPERLU_MALLOC( SUPERLU_MAX(1, n) * sizeof(doublecomplex) ); return (buf); } doublecomplex *doublecomplexCalloc_dist(int_t n) { doublecomplex *buf; register int_t i; doublecomplex zero = {0.0, 0.0}; buf = (doublecomplex *) SUPERLU_MALLOC( SUPERLU_MAX(1, n) * sizeof(doublecomplex)); if ( !buf ) return (buf); for (i = 0; i < n; ++i) buf[i] = zero; return (buf); } SuperLU_DIST_5.3.0/SRC/Makefile0000644013363400111340000000730713233431301014732 0ustar xiaoyessg####################################################################### # # This makefile creates a library for distributed SuperLU. # The files are organized as follows: # # ALLAUX -- Auxiliary routines called from all precisions # DSLUSRC -- Double precision real serial SuperLU routines # DPLUSRC -- Double precision real parallel SuperLU routines # ZSLUSRC -- Double precision complex serial SuperLU routines # ZPLUSRC -- Double precision complex parallel SuperLU routines # # The library can be set up to include routines for any combination # of the two precisions. To create or add to the library, enter make # followed by one or more of the precisions desired. Some examples: # make double # make double complex16 # Alternatively, the command # make # without any arguments creates a library of all two precisions. # The library is called # superlu.a # and is created at the next higher directory level. # # To remove the object files after the library is created, enter # make clean # ####################################################################### include ../make.inc # # Precision independent routines # ALLAUX = sp_ienv.o etree.o sp_colorder.o get_perm_c.o \ colamd.o mmd.o comm.o memory.o util.o superlu_grid.o \ pxerr_dist.o superlu_timer.o symbfact.o \ psymbfact.o psymbfact_util.o get_perm_c_parmetis.o mc64ad_dist.o \ static_schedule.o xerr_dist.o smach_dist.o dmach_dist.o \ superlu_dist_version.o ifeq "${ACC}" "GPU" ALLAUX += cublas_utils.o endif # # Routines literally taken from SuperLU, but renamed with suffix _dist # DSLUSRC = dlangs_dist.o dgsequ_dist.o dlaqgs_dist.o dutil_dist.o \ dmemory_dist.o dmyblas2_dist.o dsp_blas2_dist.o dsp_blas3_dist.o ZSLUSRC = dcomplex_dist.o zlangs_dist.o zgsequ_dist.o zlaqgs_dist.o \ zutil_dist.o zmemory_dist.o zmyblas2_dist.o \ zsp_blas2_dist.o zsp_blas3_dist.o # # Routines for double precision parallel SuperLU DPLUSRC = pdgssvx.o pdgssvx_ABglobal.o \ dreadhb.o dreadrb.o dreadtriple.o dreadMM.o \ pdgsequ.o pdlaqgs.o dldperm_dist.o pdlangs.o pdutil.o \ pdsymbfact_distdata.o ddistribute.o pddistribute.o \ pdgstrf.o pdgstrf2.o pdGetDiagU.o \ pdgstrs.o pdgstrs1.o pdgstrs_lsum.o pdgstrs_Bglobal.o \ pdgsrfs.o pdgsmv.o pdgsrfs_ABXglobal.o pdgsmv_AXglobal.o # # Routines for double complex parallel SuperLU ZPLUSRC = pzgssvx.o pzgssvx_ABglobal.o \ zreadhb.o zreadrb.o zreadtriple.o zreadMM.o \ pzgsequ.o pzlaqgs.o zldperm_dist.o pzlangs.o pzutil.o \ pzsymbfact_distdata.o zdistribute.o pzdistribute.o \ pzgstrf.o pzgstrf2.o pzGetDiagU.o \ pzgstrs.o pzgstrs1.o pzgstrs_lsum.o pzgstrs_Bglobal.o \ pzgsrfs.o pzgsmv.o pzgsrfs_ABXglobal.o pzgsmv_AXglobal.o all: double complex16 config_h: ifeq ($(XSDK_INDEX_SIZE),64) printf "#define XSDK_INDEX_SIZE 64\n" > superlu_dist_config.h else printf "/* #define XSDK_INDEX_SIZE 64 */\n" > superlu_dist_config.h endif printf "#if (XSDK_INDEX_SIZE == 64)\n#define _LONGINT 1\n#endif\n" >> superlu_dist_config.h double: config_h $(DSLUSRC) $(DPLUSRC) $(ALLAUX) $(ARCH) $(ARCHFLAGS) $(DSUPERLULIB) \ $(DSLUSRC) $(DPLUSRC) $(ALLAUX) $(RANLIB) $(DSUPERLULIB) complex16: config_h $(ZSLUSRC) $(ZPLUSRC) $(ALLAUX) $(ARCH) $(ARCHFLAGS) $(DSUPERLULIB) \ $(ZSLUSRC) $(ZPLUSRC) $(ALLAUX) $(RANLIB) $(DSUPERLULIB) pdgstrf.o: dscatter.c dlook_ahead_update.c dSchCompUdt-2Ddynamic.c pdgstrf.c $(CC) $(CFLAGS) $(CDEFS) $(BLASDEF) -c pdgstrf.c $(VERBOSE) pzgstrf.o: zscatter.c zlook_ahead_update.c zSchCompUdt-2Ddynamic.c pzgstrf.c $(CC) $(CFLAGS) $(CDEFS) $(BLASDEF) -c pzgstrf.c $(VERBOSE) .c.o: $(CC) $(CFLAGS) $(CDEFS) $(BLASDEF) -c $< $(VERBOSE) .f.o: $(FORTRAN) $(FFLAGS) -c $< $(VERBOSE) clean: rm -f *.o $(DSUPERLULIB) SuperLU_DIST_5.3.0/SRC/superlu_dist_config.h0000644013363400111340000000030513234133023017502 0ustar xiaoyessg/* superlu_dist_config.h.in */ /* Enable parmetis */ #define HAVE_PARMETIS TRUE /* enable 64bit index mode */ /* #undef XSDK_INDEX_SIZE */ #if (XSDK_INDEX_SIZE == 64) #define _LONGINT 1 #endif SuperLU_DIST_5.3.0/SRC/pdGetDiagU.c0000644013363400111340000000670113233431301015410 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file p@(pre)GetDiagU.c * \brief Extracts the main diagonal of matrix U * *
 * -- Auxiliary routine in distributed SuperLU (version 5.1.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * Xiaoye S. Li
 * Created:  April 16, 2002
 * Modified: May 15, 2016
 * 
*/ #include "superlu_ddefs.h" /*! \brief * *
 * Purpose
 * =======
 *
 * GetDiagU extracts the main diagonal of matrix U of the LU factorization.
 *  
 * Arguments
 * =========
 *
 * n        (input) int
 *          Dimension of the matrix.
 *
 * LUstruct (input) LUstruct_t*
 *          The data structures to store the distributed L and U factors.
 *          see superlu_ddefs.h for its definition.
 *
 * grid     (input) gridinfo_t*
 *          The 2D process mesh. It contains the MPI communicator, the number
 *          of process rows (NPROW), the number of process columns (NPCOL),
 *          and my process rank. It is an input argument to all the
 *          parallel routines.
 *
 * diagU    (output) double*, dimension (n)
 *          The main diagonal of matrix U.
 *          On exit, it is available on all processes.
 *
 *
 * Note
 * ====
 *
 * The diagonal blocks of the L and U matrices are stored in the L
 * data structures, and are on the diagonal processes of the
 * 2D process grid.
 *
 * This routine is modified from gather_diag_to_all() in pdgstrs_Bglobal.c.
 * 
*/ void pdGetDiagU(int_t n, LUstruct_t *LUstruct, gridinfo_t *grid, double *diagU) { int_t *xsup; int iam, knsupc, pkk; int nsupr; /* number of rows in the block L(:,k) (LDA) */ int_t i, j, jj, k, lk, lwork, nsupers, p; int_t num_diag_procs, *diag_procs, *diag_len; Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; double *dblock, *dwork, *lusup; iam = grid->iam; nsupers = Glu_persist->supno[n-1] + 1; xsup = Glu_persist->xsup; get_diag_procs(n, Glu_persist, grid, &num_diag_procs, &diag_procs, &diag_len); jj = diag_len[0]; for (j = 1; j < num_diag_procs; ++j) jj = SUPERLU_MAX( jj, diag_len[j] ); if ( !(dwork = doubleMalloc_dist(jj)) ) ABORT("Malloc fails for dwork[]"); for (p = 0; p < num_diag_procs; ++p) { pkk = diag_procs[p]; if ( iam == pkk ) { /* Copy diagonal into buffer dwork[]. */ lwork = 0; for (k = p; k < nsupers; k += num_diag_procs) { knsupc = SuperSize( k ); lk = LBj( k, grid ); nsupr = Llu->Lrowind_bc_ptr[lk][1]; /* LDA of lusup[] */ lusup = Llu->Lnzval_bc_ptr[lk]; for (i = 0; i < knsupc; ++i) /* Copy the diagonal. */ dwork[lwork+i] = lusup[i*(nsupr+1)]; lwork += knsupc; } MPI_Bcast( dwork, lwork, MPI_DOUBLE, pkk, grid->comm ); } else { MPI_Bcast( dwork, diag_len[p], MPI_DOUBLE, pkk, grid->comm ); } /* Scatter dwork[] into global diagU vector. */ lwork = 0; for (k = p; k < nsupers; k += num_diag_procs) { knsupc = SuperSize( k ); dblock = &diagU[FstBlockC( k )]; for (i = 0; i < knsupc; ++i) dblock[i] = dwork[lwork+i]; lwork += knsupc; } } /* for p = ... */ SUPERLU_FREE(diag_procs); SUPERLU_FREE(diag_len); SUPERLU_FREE(dwork); } SuperLU_DIST_5.3.0/SRC/memory.patch0000644013363400111340000000045113233431301015614 0ustar xiaoyessg118d117 < 144c143 < buf = (int_t *) SUPERLU_MALLOC(n * sizeof(int_t)); --- > buf = (int_t *) SUPERLU_MALLOC( SUPERLU_MAX(1, n) * sizeof(int_t)); 152c151 < buf = (int_t *) SUPERLU_MALLOC(n * sizeof(int_t)); --- > buf = (int_t *) SUPERLU_MALLOC( SUPERLU_MAX(1, n) * sizeof(int_t)); SuperLU_DIST_5.3.0/SRC/dreadtriple.c0000644013363400111340000001023413233431301015726 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief * */ #include #include "superlu_ddefs.h" #undef EXPAND_SYM /*! brief * *
 * Output parameters
 * =================
 *   (nzval, rowind, colptr): (*rowind)[*] contains the row subscripts of
 *      nonzeros in columns of matrix A; (*nzval)[*] the numerical values;
 *	column i of A is given by (*nzval)[k], k = (*rowind)[i],...,
 *      (*rowind)[i+1]-1.
 * 
*/ void dreadtriple_dist(FILE *fp, int_t *m, int_t *n, int_t *nonz, double **nzval, int_t **rowind, int_t **colptr) { int_t j, k, jsize, nnz, nz, new_nonz; double *a, *val; int_t *asub, *xa, *row, *col; int_t zero_base = 0; /* File format: * First line: #rows #non-zero * Triplet in the rest of lines: * row col value */ #ifdef _LONGINT fscanf(fp, "%ld%ld%ld", m, n, nonz); #else fscanf(fp, "%d%d%d", m, n, nonz); #endif #ifdef EXPAND_SYM new_nonz = 2 * *nonz - *n; #else new_nonz = *nonz; #endif *m = *n; printf("m %lld, n %lld, nonz %lld\n", (long long) *m, (long long) *n, (long long) *nonz); dallocateA_dist(*n, new_nonz, nzval, rowind, colptr); /* Allocate storage */ a = *nzval; asub = *rowind; xa = *colptr; if ( !(val = (double *) SUPERLU_MALLOC(new_nonz * sizeof(double))) ) ABORT("Malloc fails for val[]"); if ( !(row = (int_t *) SUPERLU_MALLOC(new_nonz * sizeof(int_t))) ) ABORT("Malloc fails for row[]"); if ( !(col = (int_t *) SUPERLU_MALLOC(new_nonz * sizeof(int_t))) ) ABORT("Malloc fails for col[]"); for (j = 0; j < *n; ++j) xa[j] = 0; /* Read into the triplet array from a file */ for (nnz = 0, nz = 0; nnz < *nonz; ++nnz) { #ifdef _LONGINT fscanf(fp, "%ld%ld%lf\n", &row[nz], &col[nz], &val[nz]); #else fscanf(fp, "%d%d%lf\n", &row[nz], &col[nz], &val[nz]); #endif if ( nnz == 0 ) /* first nonzero */ if ( row[0] == 0 || col[0] == 0 ) { zero_base = 1; printf("triplet file: row/col indices are zero-based.\n"); } else printf("triplet file: row/col indices are one-based.\n"); if ( !zero_base ) { /* Change to 0-based indexing. */ --row[nz]; --col[nz]; } if (row[nz] < 0 || row[nz] >= *m || col[nz] < 0 || col[nz] >= *n /*|| val[nz] == 0.*/) { fprintf(stderr, "nz " IFMT ", (" IFMT ", " IFMT ") = %e out of bound, removed\n", nz, row[nz], col[nz], val[nz]); exit(-1); } else { ++xa[col[nz]]; #ifdef EXPAND_SYM if ( row[nz] != col[nz] ) { /* Excluding diagonal */ ++nz; row[nz] = col[nz-1]; col[nz] = row[nz-1]; val[nz] = val[nz-1]; ++xa[col[nz]]; } #endif ++nz; } } *nonz = nz; #ifdef EXPAND_SYM printf("new_nonz after symmetric expansion:\t%d\n", *nonz); #endif /* Initialize the array of column pointers */ k = 0; jsize = xa[0]; xa[0] = 0; for (j = 1; j < *n; ++j) { k += jsize; jsize = xa[j]; xa[j] = k; } /* Copy the triplets into the column oriented storage */ for (nz = 0; nz < *nonz; ++nz) { j = col[nz]; k = xa[j]; asub[k] = row[nz]; a[k] = val[nz]; ++xa[j]; } /* Reset the column pointers to the beginning of each column */ for (j = *n; j > 0; --j) xa[j] = xa[j-1]; xa[0] = 0; SUPERLU_FREE(val); SUPERLU_FREE(row); SUPERLU_FREE(col); #ifdef CHK_INPUT int i; for (i = 0; i < *n; i++) { printf("Col %d, xa %d\n", i, xa[i]); for (k = xa[i]; k < xa[i+1]; k++) printf("%d\t%16.10f\n", asub[k], a[k]); } #endif } void dreadrhs(int m, double *b) { FILE *fp, *fopen(); int i; if ( !(fp = fopen("b.dat", "r")) ) { fprintf(stderr, "dreadrhs: file does not exist\n"); exit(-1); } for (i = 0; i < m; ++i) fscanf(fp, "%lf\n", &b[i]); /*fscanf(fp, "%d%lf\n", &j, &b[i]);*/ /* readpair_(j, &b[i]);*/ fclose(fp); } SuperLU_DIST_5.3.0/SRC/mmd.c0000644013363400111340000007164513233431301014221 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Implements the minimum degree algorithm */ #include "superlu_defs.h" /* *************************************************************** */ /* *************************************************************** */ /* **** GENMMD ..... MULTIPLE MINIMUM EXTERNAL DEGREE **** */ /* *************************************************************** */ /* *************************************************************** */ /* AUTHOR - JOSEPH W.H. LIU */ /* DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY. */ /* PURPOSE - THIS ROUTINE IMPLEMENTS THE MINIMUM DEGREE */ /* ALGORITHM. IT MAKES USE OF THE IMPLICIT REPRESENTATION */ /* OF ELIMINATION GRAPHS BY QUOTIENT GRAPHS, AND THE */ /* NOTION OF INDISTINGUISHABLE NODES. IT ALSO IMPLEMENTS */ /* THE MODIFICATIONS BY MULTIPLE ELIMINATION AND MINIMUM */ /* EXTERNAL DEGREE. */ /* --------------------------------------------- */ /* CAUTION - THE ADJACENCY VECTOR ADJNCY WILL BE */ /* DESTROYED. */ /* --------------------------------------------- */ /* INPUT PARAMETERS - */ /* NEQNS - NUMBER OF EQUATIONS. */ /* (XADJ,ADJNCY) - THE ADJACENCY STRUCTURE. */ /* DELTA - TOLERANCE VALUE FOR MULTIPLE ELIMINATION. */ /* MAXINT - MAXIMUM MACHINE REPRESENTABLE (SHORT) INTEGER */ /* (ANY SMALLER ESTIMATE WILL DO) FOR MARKING */ /* NODES. */ /* OUTPUT PARAMETERS - */ /* PERM - THE MINIMUM DEGREE ORDERING. */ /* INVP - THE INVERSE OF PERM. */ /* NOFSUB - AN UPPER BOUND ON THE NUMBER OF NONZERO */ /* SUBSCRIPTS FOR THE COMPRESSED STORAGE SCHEME. */ /* WORKING PARAMETERS - */ /* DHEAD - VECTOR FOR HEAD OF DEGREE LISTS. */ /* INVP - USED TEMPORARILY FOR DEGREE FORWARD LINK. */ /* PERM - USED TEMPORARILY FOR DEGREE BACKWARD LINK. */ /* QSIZE - VECTOR FOR SIZE OF SUPERNODES. */ /* LLIST - VECTOR FOR TEMPORARY LINKED LISTS. */ /* MARKER - A TEMPORARY MARKER VECTOR. */ /* PROGRAM SUBROUTINES - */ /* MMDELM, MMDINT, MMDNUM, MMDUPD. */ /* *************************************************************** */ /* Subroutine */ int genmmd_dist_(int_t *neqns, int_t *xadj, int_t *adjncy, int_t *invp, int_t *perm, int_t *delta, int_t *dhead, int_t *qsize, int_t *llist, int_t *marker, int_t *maxint, int_t *nofsub) { /* System generated locals */ int_t i__1; /* Local variables */ static int_t mdeg, ehead, i, mdlmt, mdnode; extern /* Subroutine */ int mmdelm_dist(int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *), mmdupd_dist(int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *), mmdint_dist(int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *), mmdnum_dist(int_t *, int_t *, int_t *, int_t *); static int_t nextmd, tag, num; /* *************************************************************** */ /* *************************************************************** */ /* Parameter adjustments */ --marker; --llist; --qsize; --dhead; --perm; --invp; --adjncy; --xadj; /* Function Body */ if (*neqns <= 0) { return 0; } /* ------------------------------------------------ */ /* INITIALIZATION FOR THE MINIMUM DEGREE ALGORITHM. */ /* ------------------------------------------------ */ *nofsub = 0; mmdint_dist(neqns, &xadj[1], &adjncy[1], &dhead[1], &invp[1], &perm[1], &qsize[1], &llist[1], &marker[1]); /* ---------------------------------------------- */ /* NUM COUNTS THE NUMBER OF ORDERED NODES PLUS 1. */ /* ---------------------------------------------- */ num = 1; /* ----------------------------- */ /* ELIMINATE ALL ISOLATED NODES. */ /* ----------------------------- */ nextmd = dhead[1]; L100: if (nextmd <= 0) { goto L200; } mdnode = nextmd; nextmd = invp[mdnode]; marker[mdnode] = *maxint; invp[mdnode] = -num; ++num; goto L100; L200: /* ---------------------------------------- */ /* SEARCH FOR NODE OF THE MINIMUM DEGREE. */ /* MDEG IS THE CURRENT MINIMUM DEGREE; */ /* TAG IS USED TO FACILITATE MARKING NODES. */ /* ---------------------------------------- */ if (num > *neqns) { goto L1000; } tag = 1; dhead[1] = 0; mdeg = 2; L300: if (dhead[mdeg] > 0) { goto L400; } ++mdeg; goto L300; L400: /* ------------------------------------------------- */ /* USE VALUE OF DELTA TO SET UP MDLMT, WHICH GOVERNS */ /* WHEN A DEGREE UPDATE IS TO BE PERFORMED. */ /* ------------------------------------------------- */ mdlmt = mdeg + *delta; ehead = 0; L500: mdnode = dhead[mdeg]; if (mdnode > 0) { goto L600; } ++mdeg; if (mdeg > mdlmt) { goto L900; } goto L500; L600: /* ---------------------------------------- */ /* REMOVE MDNODE FROM THE DEGREE STRUCTURE. */ /* ---------------------------------------- */ nextmd = invp[mdnode]; dhead[mdeg] = nextmd; if (nextmd > 0) { perm[nextmd] = -mdeg; } invp[mdnode] = -num; *nofsub = *nofsub + mdeg + qsize[mdnode] - 2; if (num + qsize[mdnode] > *neqns) { goto L1000; } /* ---------------------------------------------- */ /* ELIMINATE MDNODE AND PERFORM QUOTIENT GRAPH */ /* TRANSFORMATION. RESET TAG VALUE IF NECESSARY. */ /* ---------------------------------------------- */ ++tag; if (tag < *maxint) { goto L800; } tag = 1; i__1 = *neqns; for (i = 1; i <= i__1; ++i) { if (marker[i] < *maxint) { marker[i] = 0; } /* L700: */ } L800: mmdelm_dist(&mdnode, &xadj[1], &adjncy[1], &dhead[1], &invp[1], &perm[1], &qsize[1], &llist[1], &marker[1], maxint, &tag); num += qsize[mdnode]; llist[mdnode] = ehead; ehead = mdnode; if (*delta >= 0) { goto L500; } L900: /* ------------------------------------------- */ /* UPDATE DEGREES OF THE NODES INVOLVED IN THE */ /* MINIMUM DEGREE NODES ELIMINATION. */ /* ------------------------------------------- */ if (num > *neqns) { goto L1000; } mmdupd_dist(&ehead, neqns, &xadj[1], &adjncy[1], delta, &mdeg, &dhead[1], &invp[1], &perm[1], &qsize[1], &llist[1], &marker[1], maxint, &tag); goto L300; L1000: mmdnum_dist(neqns, &perm[1], &invp[1], &qsize[1]); return 0; } /* genmmd_dist_ */ /* *************************************************************** */ /* *************************************************************** */ /* *** MMDINT ..... MULT MINIMUM DEGREE INITIALIZATION *** */ /* *************************************************************** */ /* *************************************************************** */ /* AUTHOR - JOSEPH W.H. LIU */ /* DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY. */ /* PURPOSE - THIS ROUTINE PERFORMS INITIALIZATION FOR THE */ /* MULTIPLE ELIMINATION VERSION OF THE MINIMUM DEGREE */ /* ALGORITHM. */ /* INPUT PARAMETERS - */ /* NEQNS - NUMBER OF EQUATIONS. */ /* (XADJ,ADJNCY) - ADJACENCY STRUCTURE. */ /* OUTPUT PARAMETERS - */ /* (DHEAD,DFORW,DBAKW) - DEGREE DOUBLY LINKED STRUCTURE. */ /* QSIZE - SIZE OF SUPERNODE (INITIALIZED TO ONE). */ /* LLIST - LINKED LIST. */ /* MARKER - MARKER VECTOR. */ /* *************************************************************** */ /* Subroutine */ int mmdint_dist(int_t *neqns, int_t *xadj, int_t *adjncy, int_t *dhead, int_t *dforw, int_t *dbakw, int_t *qsize, int_t *llist, int_t *marker) { /* System generated locals */ int_t i__1; /* Local variables */ static int_t ndeg, node, fnode; /* *************************************************************** */ /* *************************************************************** */ /* Parameter adjustments */ --marker; --llist; --qsize; --dbakw; --dforw; --dhead; --adjncy; --xadj; /* Function Body */ i__1 = *neqns; for (node = 1; node <= i__1; ++node) { dhead[node] = 0; qsize[node] = 1; marker[node] = 0; llist[node] = 0; /* L100: */ } /* ------------------------------------------ */ /* INITIALIZE THE DEGREE DOUBLY LINKED LISTS. */ /* ------------------------------------------ */ i__1 = *neqns; for (node = 1; node <= i__1; ++node) { ndeg = xadj[node + 1] - xadj[node] + 1; fnode = dhead[ndeg]; dforw[node] = fnode; dhead[ndeg] = node; if (fnode > 0) { dbakw[fnode] = node; } dbakw[node] = -ndeg; /* L200: */ } return 0; } /* mmdint_dist */ /* *************************************************************** */ /* *************************************************************** */ /* ** MMDELM ..... MULTIPLE MINIMUM DEGREE ELIMINATION *** */ /* *************************************************************** */ /* *************************************************************** */ /* AUTHOR - JOSEPH W.H. LIU */ /* DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY. */ /* PURPOSE - THIS ROUTINE ELIMINATES THE NODE MDNODE OF */ /* MINIMUM DEGREE FROM THE ADJACENCY STRUCTURE, WHICH */ /* IS STORED IN THE QUOTIENT GRAPH FORMAT. IT ALSO */ /* TRANSFORMS THE QUOTIENT GRAPH REPRESENTATION OF THE */ /* ELIMINATION GRAPH. */ /* INPUT PARAMETERS - */ /* MDNODE - NODE OF MINIMUM DEGREE. */ /* MAXINT - ESTIMATE OF MAXIMUM REPRESENTABLE (SHORT) */ /* INT. */ /* TAG - TAG VALUE. */ /* UPDATED PARAMETERS - */ /* (XADJ,ADJNCY) - UPDATED ADJACENCY STRUCTURE. */ /* (DHEAD,DFORW,DBAKW) - DEGREE DOUBLY LINKED STRUCTURE. */ /* QSIZE - SIZE OF SUPERNODE. */ /* MARKER - MARKER VECTOR. */ /* LLIST - TEMPORARY LINKED LIST OF ELIMINATED NABORS. */ /* *************************************************************** */ /* Subroutine */ int mmdelm_dist(int_t *mdnode, int_t *xadj, int_t *adjncy, int_t *dhead, int_t *dforw, int_t *dbakw, int_t *qsize, int_t *llist, int_t *marker, int_t *maxint, int_t *tag) { /* System generated locals */ int_t i__1, i__2; /* Local variables */ static int_t node, link, rloc, rlmt, i, j, nabor, rnode, elmnt, xqnbr, istop, jstop, istrt, jstrt, nxnode, pvnode, nqnbrs, npv; /* *************************************************************** */ /* *************************************************************** */ /* ----------------------------------------------- */ /* FIND REACHABLE SET AND PLACE IN DATA STRUCTURE. */ /* ----------------------------------------------- */ /* Parameter adjustments */ --marker; --llist; --qsize; --dbakw; --dforw; --dhead; --adjncy; --xadj; /* Function Body */ marker[*mdnode] = *tag; istrt = xadj[*mdnode]; istop = xadj[*mdnode + 1] - 1; /* ------------------------------------------------------- */ /* ELMNT POINTS TO THE BEGINNING OF THE LIST OF ELIMINATED */ /* NABORS OF MDNODE, AND RLOC GIVES THE STORAGE LOCATION */ /* FOR THE NEXT REACHABLE NODE. */ /* ------------------------------------------------------- */ elmnt = 0; rloc = istrt; rlmt = istop; i__1 = istop; for (i = istrt; i <= i__1; ++i) { nabor = adjncy[i]; if (nabor == 0) { goto L300; } if (marker[nabor] >= *tag) { goto L200; } marker[nabor] = *tag; if (dforw[nabor] < 0) { goto L100; } adjncy[rloc] = nabor; ++rloc; goto L200; L100: llist[nabor] = elmnt; elmnt = nabor; L200: ; } L300: /* ----------------------------------------------------- */ /* MERGE WITH REACHABLE NODES FROM GENERALIZED ELEMENTS. */ /* ----------------------------------------------------- */ if (elmnt <= 0) { goto L1000; } adjncy[rlmt] = -elmnt; link = elmnt; L400: jstrt = xadj[link]; jstop = xadj[link + 1] - 1; i__1 = jstop; for (j = jstrt; j <= i__1; ++j) { node = adjncy[j]; link = -node; if (node < 0) { goto L400; } else if (node == 0) { goto L900; } else { goto L500; } L500: if (marker[node] >= *tag || dforw[node] < 0) { goto L800; } marker[node] = *tag; /* --------------------------------- */ /* USE STORAGE FROM ELIMINATED NODES */ /* IF NECESSARY. */ /* --------------------------------- */ L600: if (rloc < rlmt) { goto L700; } link = -adjncy[rlmt]; rloc = xadj[link]; rlmt = xadj[link + 1] - 1; goto L600; L700: adjncy[rloc] = node; ++rloc; L800: ; } L900: elmnt = llist[elmnt]; goto L300; L1000: if (rloc <= rlmt) { adjncy[rloc] = 0; } /* -------------------------------------------------------- */ /* FOR EACH NODE IN THE REACHABLE SET, DO THE FOLLOWING ... */ /* -------------------------------------------------------- */ link = *mdnode; L1100: istrt = xadj[link]; istop = xadj[link + 1] - 1; i__1 = istop; for (i = istrt; i <= i__1; ++i) { rnode = adjncy[i]; link = -rnode; if (rnode < 0) { goto L1100; } else if (rnode == 0) { goto L1800; } else { goto L1200; } L1200: /* -------------------------------------------- */ /* IF RNODE IS IN THE DEGREE LIST STRUCTURE ... */ /* -------------------------------------------- */ pvnode = dbakw[rnode]; if (pvnode == 0 || pvnode == -(*maxint)) { goto L1300; } /* ------------------------------------- */ /* THEN REMOVE RNODE FROM THE STRUCTURE. */ /* ------------------------------------- */ nxnode = dforw[rnode]; if (nxnode > 0) { dbakw[nxnode] = pvnode; } if (pvnode > 0) { dforw[pvnode] = nxnode; } npv = -pvnode; if (pvnode < 0) { dhead[npv] = nxnode; } L1300: /* ---------------------------------------- */ /* PURGE INACTIVE QUOTIENT NABORS OF RNODE. */ /* ---------------------------------------- */ jstrt = xadj[rnode]; jstop = xadj[rnode + 1] - 1; xqnbr = jstrt; i__2 = jstop; for (j = jstrt; j <= i__2; ++j) { nabor = adjncy[j]; if (nabor == 0) { goto L1500; } if (marker[nabor] >= *tag) { goto L1400; } adjncy[xqnbr] = nabor; ++xqnbr; L1400: ; } L1500: /* ---------------------------------------- */ /* IF NO ACTIVE NABOR AFTER THE PURGING ... */ /* ---------------------------------------- */ nqnbrs = xqnbr - jstrt; if (nqnbrs > 0) { goto L1600; } /* ----------------------------- */ /* THEN MERGE RNODE WITH MDNODE. */ /* ----------------------------- */ qsize[*mdnode] += qsize[rnode]; qsize[rnode] = 0; marker[rnode] = *maxint; dforw[rnode] = -(*mdnode); dbakw[rnode] = -(*maxint); goto L1700; L1600: /* -------------------------------------- */ /* ELSE FLAG RNODE FOR DEGREE UPDATE, AND */ /* ADD MDNODE AS A NABOR OF RNODE. */ /* -------------------------------------- */ dforw[rnode] = nqnbrs + 1; dbakw[rnode] = 0; adjncy[xqnbr] = *mdnode; ++xqnbr; if (xqnbr <= jstop) { adjncy[xqnbr] = 0; } L1700: ; } L1800: return 0; } /* mmdelm_dist */ /* *************************************************************** */ /* *************************************************************** */ /* ***** MMDUPD ..... MULTIPLE MINIMUM DEGREE UPDATE ***** */ /* *************************************************************** */ /* *************************************************************** */ /* AUTHOR - JOSEPH W.H. LIU */ /* DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY. */ /* PURPOSE - THIS ROUTINE UPDATES THE DEGREES OF NODES */ /* AFTER A MULTIPLE ELIMINATION STEP. */ /* INPUT PARAMETERS - */ /* EHEAD - THE BEGINNING OF THE LIST OF ELIMINATED */ /* NODES (I.E., NEWLY FORMED ELEMENTS). */ /* NEQNS - NUMBER OF EQUATIONS. */ /* (XADJ,ADJNCY) - ADJACENCY STRUCTURE. */ /* DELTA - TOLERANCE VALUE FOR MULTIPLE ELIMINATION. */ /* MAXINT - MAXIMUM MACHINE REPRESENTABLE (SHORT) */ /* INTEGER. */ /* UPDATED PARAMETERS - */ /* MDEG - NEW MINIMUM DEGREE AFTER DEGREE UPDATE. */ /* (DHEAD,DFORW,DBAKW) - DEGREE DOUBLY LINKED STRUCTURE. */ /* QSIZE - SIZE OF SUPERNODE. */ /* LLIST - WORKING LINKED LIST. */ /* MARKER - MARKER VECTOR FOR DEGREE UPDATE. */ /* TAG - TAG VALUE. */ /* *************************************************************** */ /* Subroutine */ int mmdupd_dist(int_t *ehead, int_t *neqns, int_t *xadj, int_t *adjncy, int_t *delta, int_t *mdeg, int_t *dhead, int_t *dforw, int_t *dbakw, int_t *qsize, int_t *llist, int_t *marker, int_t *maxint, int_t *tag) { /* System generated locals */ int_t i__1, i__2; /* Local variables */ static int_t node, mtag, link, mdeg0, i, j, enode, fnode, nabor, elmnt, istop, jstop, q2head, istrt, jstrt, qxhead, iq2, deg, deg0; /* *************************************************************** */ /* *************************************************************** */ /* Parameter adjustments */ --marker; --llist; --qsize; --dbakw; --dforw; --dhead; --adjncy; --xadj; /* Function Body */ mdeg0 = *mdeg + *delta; elmnt = *ehead; L100: /* ------------------------------------------------------- */ /* FOR EACH OF THE NEWLY FORMED ELEMENT, DO THE FOLLOWING. */ /* (RESET TAG VALUE IF NECESSARY.) */ /* ------------------------------------------------------- */ if (elmnt <= 0) { return 0; } mtag = *tag + mdeg0; if (mtag < *maxint) { goto L300; } *tag = 1; i__1 = *neqns; for (i = 1; i <= i__1; ++i) { if (marker[i] < *maxint) { marker[i] = 0; } /* L200: */ } mtag = *tag + mdeg0; L300: /* --------------------------------------------- */ /* CREATE TWO LINKED LISTS FROM NODES ASSOCIATED */ /* WITH ELMNT: ONE WITH TWO NABORS (Q2HEAD) IN */ /* ADJACENCY STRUCTURE, AND THE OTHER WITH MORE */ /* THAN TWO NABORS (QXHEAD). ALSO COMPUTE DEG0, */ /* NUMBER OF NODES IN THIS ELEMENT. */ /* --------------------------------------------- */ q2head = 0; qxhead = 0; deg0 = 0; link = elmnt; L400: istrt = xadj[link]; istop = xadj[link + 1] - 1; i__1 = istop; for (i = istrt; i <= i__1; ++i) { enode = adjncy[i]; link = -enode; if (enode < 0) { goto L400; } else if (enode == 0) { goto L800; } else { goto L500; } L500: if (qsize[enode] == 0) { goto L700; } deg0 += qsize[enode]; marker[enode] = mtag; /* ---------------------------------- */ /* IF ENODE REQUIRES A DEGREE UPDATE, */ /* THEN DO THE FOLLOWING. */ /* ---------------------------------- */ if (dbakw[enode] != 0) { goto L700; } /* --------------------------------------- */ /* PLACE EITHER IN QXHEAD OR Q2HEAD LISTS. */ /* --------------------------------------- */ if (dforw[enode] == 2) { goto L600; } llist[enode] = qxhead; qxhead = enode; goto L700; L600: llist[enode] = q2head; q2head = enode; L700: ; } L800: /* -------------------------------------------- */ /* FOR EACH ENODE IN Q2 LIST, DO THE FOLLOWING. */ /* -------------------------------------------- */ enode = q2head; iq2 = 1; L900: if (enode <= 0) { goto L1500; } if (dbakw[enode] != 0) { goto L2200; } ++(*tag); deg = deg0; /* ------------------------------------------ */ /* IDENTIFY THE OTHER ADJACENT ELEMENT NABOR. */ /* ------------------------------------------ */ istrt = xadj[enode]; nabor = adjncy[istrt]; if (nabor == elmnt) { nabor = adjncy[istrt + 1]; } /* ------------------------------------------------ */ /* IF NABOR IS UNELIMINATED, INCREASE DEGREE COUNT. */ /* ------------------------------------------------ */ link = nabor; if (dforw[nabor] < 0) { goto L1000; } deg += qsize[nabor]; goto L2100; L1000: /* -------------------------------------------- */ /* OTHERWISE, FOR EACH NODE IN THE 2ND ELEMENT, */ /* DO THE FOLLOWING. */ /* -------------------------------------------- */ istrt = xadj[link]; istop = xadj[link + 1] - 1; i__1 = istop; for (i = istrt; i <= i__1; ++i) { node = adjncy[i]; link = -node; if (node == enode) { goto L1400; } if (node < 0) { goto L1000; } else if (node == 0) { goto L2100; } else { goto L1100; } L1100: if (qsize[node] == 0) { goto L1400; } if (marker[node] >= *tag) { goto L1200; } /* ----------------------------------- -- */ /* CASE WHEN NODE IS NOT YET CONSIDERED . */ /* ----------------------------------- -- */ marker[node] = *tag; deg += qsize[node]; goto L1400; L1200: /* ---------------------------------------- */ /* CASE WHEN NODE IS INDISTINGUISHABLE FROM */ /* ENODE. MERGE THEM INTO A NEW SUPERNODE. */ /* ---------------------------------------- */ if (dbakw[node] != 0) { goto L1400; } if (dforw[node] != 2) { goto L1300; } qsize[enode] += qsize[node]; qsize[node] = 0; marker[node] = *maxint; dforw[node] = -enode; dbakw[node] = -(*maxint); goto L1400; L1300: /* -------------------------------------- */ /* CASE WHEN NODE IS OUTMATCHED BY ENODE. */ /* -------------------------------------- */ if (dbakw[node] == 0) { dbakw[node] = -(*maxint); } L1400: ; } goto L2100; L1500: /* ------------------------------------------------ */ /* FOR EACH ENODE IN THE QX LIST, DO THE FOLLOWING. */ /* ------------------------------------------------ */ enode = qxhead; iq2 = 0; L1600: if (enode <= 0) { goto L2300; } if (dbakw[enode] != 0) { goto L2200; } ++(*tag); deg = deg0; /* --------------------------------- */ /* FOR EACH UNMARKED NABOR OF ENODE, */ /* DO THE FOLLOWING. */ /* --------------------------------- */ istrt = xadj[enode]; istop = xadj[enode + 1] - 1; i__1 = istop; for (i = istrt; i <= i__1; ++i) { nabor = adjncy[i]; if (nabor == 0) { goto L2100; } if (marker[nabor] >= *tag) { goto L2000; } marker[nabor] = *tag; link = nabor; /* ------------------------------ */ /* IF UNELIMINATED, INCLUDE IT IN */ /* DEG COUNT. */ /* ------------------------------ */ if (dforw[nabor] < 0) { goto L1700; } deg += qsize[nabor]; goto L2000; L1700: /* ------------------------------- */ /* IF ELIMINATED, INCLUDE UNMARKED */ /* NODES IN THIS ELEMENT INTO THE */ /* DEGREE COUNT. */ /* ------------------------------- */ jstrt = xadj[link]; jstop = xadj[link + 1] - 1; i__2 = jstop; for (j = jstrt; j <= i__2; ++j) { node = adjncy[j]; link = -node; if (node < 0) { goto L1700; } else if (node == 0) { goto L2000; } else { goto L1800; } L1800: if (marker[node] >= *tag) { goto L1900; } marker[node] = *tag; deg += qsize[node]; L1900: ; } L2000: ; } L2100: /* ------------------------------------------- */ /* UPDATE EXTERNAL DEGREE OF ENODE IN DEGREE */ /* STRUCTURE, AND MDEG (MIN DEG) IF NECESSARY. */ /* ------------------------------------------- */ deg = deg - qsize[enode] + 1; fnode = dhead[deg]; dforw[enode] = fnode; dbakw[enode] = -deg; if (fnode > 0) { dbakw[fnode] = enode; } dhead[deg] = enode; if (deg < *mdeg) { *mdeg = deg; } L2200: /* ---------------------------------- */ /* GET NEXT ENODE IN CURRENT ELEMENT. */ /* ---------------------------------- */ enode = llist[enode]; if (iq2 == 1) { goto L900; } goto L1600; L2300: /* ----------------------------- */ /* GET NEXT ELEMENT IN THE LIST. */ /* ----------------------------- */ *tag = mtag; elmnt = llist[elmnt]; goto L100; } /* mmdupd_dist */ /* *************************************************************** */ /* *************************************************************** */ /* ***** MMDNUM ..... MULTI MINIMUM DEGREE NUMBERING ***** */ /* *************************************************************** */ /* *************************************************************** */ /* AUTHOR - JOSEPH W.H. LIU */ /* DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY. */ /* PURPOSE - THIS ROUTINE PERFORMS THE FINAL STEP IN */ /* PRODUCING THE PERMUTATION AND INVERSE PERMUTATION */ /* VECTORS IN THE MULTIPLE ELIMINATION VERSION OF THE */ /* MINIMUM DEGREE ORDERING ALGORITHM. */ /* INPUT PARAMETERS - */ /* NEQNS - NUMBER OF EQUATIONS. */ /* QSIZE - SIZE OF SUPERNODES AT ELIMINATION. */ /* UPDATED PARAMETERS - */ /* INVP - INVERSE PERMUTATION VECTOR. ON INPUT, */ /* IF QSIZE(NODE)=0, THEN NODE HAS BEEN MERGED */ /* INTO THE NODE -INVP(NODE); OTHERWISE, */ /* -INVP(NODE) IS ITS INVERSE LABELLING. */ /* OUTPUT PARAMETERS - */ /* PERM - THE PERMUTATION VECTOR. */ /* *************************************************************** */ /* Subroutine */ int mmdnum_dist(int_t *neqns, int_t *perm, int_t *invp, int_t *qsize) { /* System generated locals */ int_t i__1; /* Local variables */ static int_t node, root, nextf, father, nqsize, num; /* *************************************************************** */ /* *************************************************************** */ /* Parameter adjustments */ --qsize; --invp; --perm; /* Function Body */ i__1 = *neqns; for (node = 1; node <= i__1; ++node) { nqsize = qsize[node]; if (nqsize <= 0) { perm[node] = invp[node]; } if (nqsize > 0) { perm[node] = -invp[node]; } /* L100: */ } /* ------------------------------------------------------ */ /* FOR EACH NODE WHICH HAS BEEN MERGED, DO THE FOLLOWING. */ /* ------------------------------------------------------ */ i__1 = *neqns; for (node = 1; node <= i__1; ++node) { if (perm[node] > 0) { goto L500; } /* ----------------------------------------- */ /* TRACE THE MERGED TREE UNTIL ONE WHICH HAS */ /* NOT BEEN MERGED, CALL IT ROOT. */ /* ----------------------------------------- */ father = node; L200: if (perm[father] > 0) { goto L300; } father = -perm[father]; goto L200; L300: /* ----------------------- */ /* NUMBER NODE AFTER ROOT. */ /* ----------------------- */ root = father; num = perm[root] + 1; invp[node] = -num; perm[root] = num; /* ------------------------ */ /* SHORTEN THE MERGED TREE. */ /* ------------------------ */ father = node; L400: nextf = -perm[father]; if (nextf <= 0) { goto L500; } perm[father] = -root; father = nextf; goto L400; L500: ; } /* ---------------------- */ /* READY TO COMPUTE PERM. */ /* ---------------------- */ i__1 = *neqns; for (node = 1; node <= i__1; ++node) { num = -invp[node]; invp[node] = num; perm[num] = node; /* L600: */ } return 0; } /* mmdnum_dist */ SuperLU_DIST_5.3.0/SRC/pzgstrs_lsum.c0000644013363400111340000003110513233431301016203 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Perform local block modifications: lsum[i] -= L_i,k * X[k] * *
 * -- Distributed SuperLU routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * March 15, 2003
 *
 * Modified:
 *     Feburary 7, 2001    use MPI_Isend/MPI_Irecv
 *     October 2, 2001     use MPI_Isend/MPI_Irecv with MPI_Test
 * 
*/ #include "superlu_zdefs.h" #define ISEND_IRECV /* * Function prototypes */ #ifdef _CRAY fortran void CTRSM(_fcd, _fcd, _fcd, _fcd, int*, int*, doublecomplex*, doublecomplex*, int*, doublecomplex*, int*); fortran void CGEMM(_fcd, _fcd, int*, int*, int*, doublecomplex*, doublecomplex*, int*, doublecomplex*, int*, doublecomplex*, doublecomplex*, int*); _fcd ftcs1; _fcd ftcs2; _fcd ftcs3; #endif /************************************************************************/ /*! \brief * *
 * Purpose
 * =======
 *   Perform local block modifications: lsum[i] -= L_i,k * X[k].
 * 
*/ void zlsum_fmod /************************************************************************/ ( doublecomplex *lsum, /* Sum of local modifications. */ doublecomplex *x, /* X array (local) */ doublecomplex *xk, /* X[k]. */ doublecomplex *rtemp, /* Result of full matrix-vector multiply. */ int nrhs, /* Number of right-hand sides. */ int knsupc, /* Size of supernode k. */ int_t k, /* The k-th component of X. */ int_t *fmod, /* Modification count for L-solve. */ int_t nlb, /* Number of L blocks. */ int_t lptr, /* Starting position in lsub[*]. */ int_t luptr, /* Starting position in lusup[*]. */ int_t *xsup, gridinfo_t *grid, LocalLU_t *Llu, MPI_Request send_req[], /* input/output */ SuperLUStat_t *stat ) { doublecomplex alpha = {1.0, 0.0}, beta = {0.0, 0.0}; doublecomplex *lusup, *lusup1; doublecomplex *dest; int iam, iknsupc, myrow, nbrow, nsupr, nsupr1, p, pi; int_t i, ii, ik, il, ikcol, irow, j, lb, lk, rel; int_t *lsub, *lsub1, nlb1, lptr1, luptr1; int_t *ilsum = Llu->ilsum; /* Starting position of each supernode in lsum. */ int_t *frecv = Llu->frecv; int_t **fsendx_plist = Llu->fsendx_plist; MPI_Status status; int test_flag; iam = grid->iam; myrow = MYROW( iam, grid ); lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Llu->Lrowind_bc_ptr[lk]; lusup = Llu->Lnzval_bc_ptr[lk]; nsupr = lsub[1]; for (lb = 0; lb < nlb; ++lb) { ik = lsub[lptr]; /* Global block number, row-wise. */ nbrow = lsub[lptr+1]; #ifdef _CRAY CGEMM( ftcs2, ftcs2, &nbrow, &nrhs, &knsupc, &alpha, &lusup[luptr], &nsupr, xk, &knsupc, &beta, rtemp, &nbrow ); #elif defined (USE_VENDOR_BLAS) zgemm_( "N", "N", &nbrow, &nrhs, &knsupc, &alpha, &lusup[luptr], &nsupr, xk, &knsupc, &beta, rtemp, &nbrow, 1, 1 ); #else zgemm_( "N", "N", &nbrow, &nrhs, &knsupc, &alpha, &lusup[luptr], &nsupr, xk, &knsupc, &beta, rtemp, &nbrow ); #endif stat->ops[SOLVE] += 8 * nbrow * nrhs * knsupc + 2 * nbrow * nrhs; lk = LBi( ik, grid ); /* Local block number, row-wise. */ iknsupc = SuperSize( ik ); il = LSUM_BLK( lk ); dest = &lsum[il]; lptr += LB_DESCRIPTOR; rel = xsup[ik]; /* Global row index of block ik. */ for (i = 0; i < nbrow; ++i) { irow = lsub[lptr++] - rel; /* Relative row. */ RHS_ITERATE(j) z_sub(&dest[irow + j*iknsupc], &dest[irow + j*iknsupc], &rtemp[i + j*nbrow]); } luptr += nbrow; if ( (--fmod[lk])==0 ) { /* Local accumulation done. */ ikcol = PCOL( ik, grid ); p = PNUM( myrow, ikcol, grid ); if ( iam != p ) { #ifdef ISEND_IRECV MPI_Isend( &lsum[il - LSUM_H], iknsupc * nrhs + LSUM_H, SuperLU_MPI_DOUBLE_COMPLEX, p, LSUM, grid->comm, &send_req[Llu->SolveMsgSent++] ); #else #ifdef BSEND MPI_Bsend( &lsum[il - LSUM_H], iknsupc * nrhs + LSUM_H, SuperLU_MPI_DOUBLE_COMPLEX, p, LSUM, grid->comm ); #else MPI_Send( &lsum[il - LSUM_H], iknsupc * nrhs + LSUM_H, SuperLU_MPI_DOUBLE_COMPLEX, p, LSUM, grid->comm ); #endif #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent LSUM[%2.0f], size %2d, to P %2d\n", iam, lsum[il-LSUM_H], iknsupc*nrhs+LSUM_H, p); #endif } else { /* Diagonal process: X[i] += lsum[i]. */ ii = X_BLK( lk ); RHS_ITERATE(j) for (i = 0; i < iknsupc; ++i) z_add(&x[i + ii + j*iknsupc], &x[i + ii + j*iknsupc], &lsum[i + il + j*iknsupc]); if ( frecv[lk]==0 ) { /* Becomes a leaf node. */ fmod[lk] = -1; /* Do not solve X[k] in the future. */ lk = LBj( ik, grid );/* Local block number, column-wise. */ lsub1 = Llu->Lrowind_bc_ptr[lk]; lusup1 = Llu->Lnzval_bc_ptr[lk]; nsupr1 = lsub1[1]; #ifdef _CRAY CTRSM(ftcs1, ftcs1, ftcs2, ftcs3, &iknsupc, &nrhs, &alpha, lusup1, &nsupr1, &x[ii], &iknsupc); #elif defined (USE_VENDOR_BLAS) ztrsm_("L", "L", "N", "U", &iknsupc, &nrhs, &alpha, lusup1, &nsupr1, &x[ii], &iknsupc, 1, 1, 1, 1); #else ztrsm_("L", "L", "N", "U", &iknsupc, &nrhs, &alpha, lusup1, &nsupr1, &x[ii], &iknsupc); #endif stat->ops[SOLVE] += 4 * iknsupc * (iknsupc - 1) * nrhs + 10 * knsupc * nrhs; /* complex division */ #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, ik); #endif /* * Send Xk to process column Pc[k]. */ for (p = 0; p < grid->nprow; ++p) { if ( fsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, ikcol, grid ); #ifdef ISEND_IRECV MPI_Isend( &x[ii - XK_H], iknsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm, &send_req[Llu->SolveMsgSent++] ); #else #ifdef BSEND MPI_Bsend( &x[ii - XK_H], iknsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm ); #else MPI_Send( &x[ii - XK_H], iknsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm ); #endif #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii-XK_H], pi); #endif } } /* * Perform local block modifications. */ nlb1 = lsub1[0] - 1; lptr1 = BC_HEADER + LB_DESCRIPTOR + iknsupc; luptr1 = iknsupc; /* Skip diagonal block L(I,I). */ zlsum_fmod(lsum, x, &x[ii], rtemp, nrhs, iknsupc, ik, fmod, nlb1, lptr1, luptr1, xsup, grid, Llu, send_req, stat); } /* if frecv[lk] == 0 */ } /* if iam == p */ } /* if fmod[lk] == 0 */ } /* for lb ... */ } /* zLSUM_FMOD */ /************************************************************************/ void zlsum_bmod /************************************************************************/ ( doublecomplex *lsum, /* Sum of local modifications. */ doublecomplex *x, /* X array (local). */ doublecomplex *xk, /* X[k]. */ int nrhs, /* Number of right-hand sides. */ int_t k, /* The k-th component of X. */ int_t *bmod, /* Modification count for L-solve. */ int_t *Urbs, /* Number of row blocks in each block column of U.*/ Ucb_indptr_t **Ucb_indptr,/* Vertical linked list pointing to Uindex[].*/ int_t **Ucb_valptr, /* Vertical linked list pointing to Unzval[]. */ int_t *xsup, gridinfo_t *grid, LocalLU_t *Llu, MPI_Request send_req[], /* input/output */ SuperLUStat_t *stat ) { /* * Purpose * ======= * Perform local block modifications: lsum[i] -= U_i,k * X[k]. */ doublecomplex alpha = {1.0, 0.0}; int iam, iknsupc, knsupc, myrow, nsupr, p, pi; int_t fnz, gik, gikcol, i, ii, ik, ikfrow, iklrow, il, irow, j, jj, lk, lk1, nub, ub, uptr; int_t *usub; doublecomplex *uval, *dest, *y; doublecomplex temp; int_t *lsub; doublecomplex *lusup; int_t *ilsum = Llu->ilsum; /* Starting position of each supernode in lsum. */ int_t *brecv = Llu->brecv; int_t **bsendx_plist = Llu->bsendx_plist; MPI_Status status; int test_flag; iam = grid->iam; myrow = MYROW( iam, grid ); knsupc = SuperSize( k ); lk = LBj( k, grid ); /* Local block number, column-wise. */ nub = Urbs[lk]; /* Number of U blocks in block column lk */ for (ub = 0; ub < nub; ++ub) { ik = Ucb_indptr[lk][ub].lbnum; /* Local block number, row-wise. */ usub = Llu->Ufstnz_br_ptr[ik]; uval = Llu->Unzval_br_ptr[ik]; i = Ucb_indptr[lk][ub].indpos; /* Start of the block in usub[]. */ i += UB_DESCRIPTOR; il = LSUM_BLK( ik ); gik = ik * grid->nprow + myrow;/* Global block number, row-wise. */ iknsupc = SuperSize( gik ); ikfrow = FstBlockC( gik ); iklrow = FstBlockC( gik+1 ); RHS_ITERATE(j) { dest = &lsum[il + j*iknsupc]; y = &xk[j*knsupc]; uptr = Ucb_valptr[lk][ub]; /* Start of the block in uval[]. */ for (jj = 0; jj < knsupc; ++jj) { fnz = usub[i + jj]; if ( fnz < iklrow ) { /* Nonzero segment. */ /* AXPY */ for (irow = fnz; irow < iklrow; ++irow) { zz_mult(&temp, &uval[uptr], &y[jj]); z_sub(&dest[irow - ikfrow], &dest[irow - ikfrow], &temp); ++uptr; } stat->ops[SOLVE] += 8 * (iklrow - fnz); } } /* for jj ... */ } if ( (--bmod[ik]) == 0 ) { /* Local accumulation done. */ gikcol = PCOL( gik, grid ); p = PNUM( myrow, gikcol, grid ); if ( iam != p ) { #ifdef ISEND_IRECV MPI_Isend( &lsum[il - LSUM_H], iknsupc * nrhs + LSUM_H, SuperLU_MPI_DOUBLE_COMPLEX, p, LSUM, grid->comm, &send_req[Llu->SolveMsgSent++] ); #else #ifdef BSEND MPI_Bsend( &lsum[il - LSUM_H], iknsupc * nrhs + LSUM_H, SuperLU_MPI_DOUBLE_COMPLEX, p, LSUM, grid->comm ); #else MPI_Send( &lsum[il - LSUM_H], iknsupc * nrhs + LSUM_H, SuperLU_MPI_DOUBLE_COMPLEX, p, LSUM, grid->comm ); #endif #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent LSUM[%2.0f], size %2d, to P %2d\n", iam, lsum[il-LSUM_H], iknsupc*nrhs+LSUM_H, p); #endif } else { /* Diagonal process: X[i] += lsum[i]. */ ii = X_BLK( ik ); dest = &x[ii]; RHS_ITERATE(j) for (i = 0; i < iknsupc; ++i) z_add(&dest[i + j*iknsupc], &dest[i + j*iknsupc], &lsum[i + il + j*iknsupc]); if ( !brecv[ik] ) { /* Becomes a leaf node. */ bmod[ik] = -1; /* Do not solve X[k] in the future. */ lk1 = LBj( gik, grid ); /* Local block number. */ lsub = Llu->Lrowind_bc_ptr[lk1]; lusup = Llu->Lnzval_bc_ptr[lk1]; nsupr = lsub[1]; #ifdef _CRAY CTRSM(ftcs1, ftcs3, ftcs2, ftcs2, &iknsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &iknsupc); #elif defined (USE_VENDOR_BLAS) ztrsm_("L", "U", "N", "N", &iknsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &iknsupc, 1, 1, 1, 1); #else ztrsm_("L", "U", "N", "N", &iknsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &iknsupc); #endif stat->ops[SOLVE] += 4 * iknsupc * (iknsupc + 1) * nrhs + 10 * iknsupc * nrhs; /* complex division */ #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, gik); #endif /* * Send Xk to process column Pc[k]. */ for (p = 0; p < grid->nprow; ++p) { if ( bsendx_plist[lk1][p] != EMPTY ) { pi = PNUM( p, gikcol, grid ); #ifdef ISEND_IRECV MPI_Isend( &x[ii - XK_H], iknsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm, &send_req[Llu->SolveMsgSent++] ); #else #ifdef BSEND MPI_Bsend( &x[ii - XK_H], iknsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm ); #else MPI_Send( &x[ii - XK_H], iknsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm ); #endif #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii-XK_H], pi); #endif } } /* * Perform local block modifications. */ if ( Urbs[lk1] ) zlsum_bmod(lsum, x, &x[ii], nrhs, gik, bmod, Urbs, Ucb_indptr, Ucb_valptr, xsup, grid, Llu, send_req, stat); } /* if brecv[ik] == 0 */ } } /* if bmod[ik] == 0 */ } /* for ub ... */ } /* zlSUM_BMOD */ SuperLU_DIST_5.3.0/SRC/smach_dist.c0000644013363400111340000000560213233431301015550 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ #include #include #include #include float smach_dist(char *cmach) { /* -- SuperLU auxiliary routine (version 5.0) -- This uses C99 standard constants, and is thread safe. Must be compiled with "-std=c99" flag. Purpose ======= SMACH returns single precision machine parameters. Arguments ========= CMACH (input) CHARACTER*1 Specifies the value to be returned by SMACH: = 'E' or 'e', SMACH := eps = 'S' or 's , SMACH := sfmin = 'B' or 'b', SMACH := base = 'P' or 'p', SMACH := eps*base = 'N' or 'n', SMACH := t = 'R' or 'r', SMACH := rnd = 'M' or 'm', SMACH := emin = 'U' or 'u', SMACH := rmin = 'L' or 'l', SMACH := emax = 'O' or 'o', SMACH := rmax where eps = relative machine precision sfmin = safe minimum, such that 1/sfmin does not overflow base = base of the machine prec = eps*base t = number of (base) digits in the mantissa rnd = 1.0 when rounding occurs in addition, 0.0 otherwise emin = minimum exponent before (gradual) underflow rmin = underflow threshold - base**(emin-1) emax = largest exponent before overflow rmax = overflow threshold - (base**emax)*(1-eps) ===================================================================== */ float sfmin, small, rmach; if ( strncmp(cmach, "E", 1)==0 ) { rmach = FLT_EPSILON * 0.5; } else if ( strncmp(cmach, "S", 1)==0 ) { sfmin = FLT_MIN; small = 1. / FLT_MAX; if (small >= sfmin) { /* Use SMALL plus a bit, to avoid the possibility of rounding causing overflow when computing 1/sfmin. */ sfmin = small * (FLT_EPSILON*0.5 + 1.); } rmach = sfmin; } else if ( strncmp(cmach, "B", 1)==0 ) { rmach = FLT_RADIX; } else if ( strncmp(cmach, "P", 1)==0 ) { rmach = FLT_EPSILON * 0.5 * FLT_RADIX; } else if ( strncmp(cmach, "N", 1)==0 ) { rmach = FLT_MANT_DIG; } else if ( strncmp(cmach, "R", 1)==0 ) { rmach = FLT_ROUNDS; } else if ( strncmp(cmach, "M", 1)==0 ) { rmach = FLT_MIN_EXP; } else if ( strncmp(cmach, "U", 1)==0 ) { rmach = FLT_MIN; } else if ( strncmp(cmach, "L", 1)==0 ) { rmach = FLT_MAX_EXP; } else if ( strncmp(cmach, "O", 1)==0 ) { rmach = FLT_MAX; } return rmach; } /* end smach_dist */ SuperLU_DIST_5.3.0/SRC/ddistribute.c0000644013363400111340000006205713233431301015763 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Distribute the matrix onto the 2D process mesh. * *
 * -- Distributed SuperLU routine (version 2.3) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * October 15, 2008
 * 
*/ #include "superlu_ddefs.h" /*! \brief * *
 * Purpose
 * =======
 *   Distribute the matrix onto the 2D process mesh.
 * 
 * Arguments
 * =========
 * 
 * fact (input) fact_t
 *        Specifies whether or not the L and U structures will be re-used.
 *        = SamePattern_SameRowPerm: L and U structures are input, and
 *                                   unchanged on exit.
 *        = DOFACT or SamePattern: L and U structures are computed and output.
 *
 * n      (input) int
 *        Dimension of the matrix.
 *
 * A      (input) SuperMatrix*
 *	  The original matrix A, permuted by columns, of dimension
 *        (A->nrow, A->ncol). The type of A can be:
 *        Stype = SLU_NCP; Dtype = SLU_D; Mtype = SLU_GE.
 *
 * LUstruct (input) LUstruct_t*
 *        Data structures for L and U factors.
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh.
 *
 * Return value
 * ============
 *   > 0, working storage required (in bytes).
 * 
*/ float ddistribute(fact_t fact, int_t n, SuperMatrix *A, Glu_freeable_t *Glu_freeable, LUstruct_t *LUstruct, gridinfo_t *grid) { Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; int_t bnnz, fsupc, fsupc1, i, ii, irow, istart, j, jb, jj, k, len, len1, nsupc; int_t ljb; /* local block column number */ int_t nrbl; /* number of L blocks in current block column */ int_t nrbu; /* number of U blocks in current block column */ int_t gb; /* global block number; 0 < gb <= nsuper */ int_t lb; /* local block number; 0 < lb <= ceil(NSUPERS/Pr) */ int iam, jbrow, kcol, mycol, myrow, pc, pr; int_t mybufmax[NBUFFERS]; NCPformat *Astore; double *a; int_t *asub; int_t *xa_begin, *xa_end; int_t *xsup = Glu_persist->xsup; /* supernode and column mapping */ int_t *supno = Glu_persist->supno; int_t *lsub, *xlsub, *usub, *xusub; int_t nsupers; int_t next_lind; /* next available position in index[*] */ int_t next_lval; /* next available position in nzval[*] */ int_t *index; /* indices consist of headers and row subscripts */ int *index1; /* temporary pointer to array of int */ double *lusup, *uval; /* nonzero values in L and U */ double **Lnzval_bc_ptr; /* size ceil(NSUPERS/Pc) */ int_t **Lrowind_bc_ptr; /* size ceil(NSUPERS/Pc) */ double **Unzval_br_ptr; /* size ceil(NSUPERS/Pr) */ int_t **Ufstnz_br_ptr; /* size ceil(NSUPERS/Pr) */ /*-- Counts to be used in factorization. --*/ int *ToRecv, *ToSendD, **ToSendR; /*-- Counts to be used in lower triangular solve. --*/ int_t *fmod; /* Modification count for L-solve. */ int_t **fsendx_plist; /* Column process list to send down Xk. */ int_t nfrecvx = 0; /* Number of Xk I will receive. */ int_t nfsendx = 0; /* Number of Xk I will send */ int_t kseen; /*-- Counts to be used in upper triangular solve. --*/ int_t *bmod; /* Modification count for U-solve. */ int_t **bsendx_plist; /* Column process list to send down Xk. */ int_t nbrecvx = 0; /* Number of Xk I will receive. */ int_t nbsendx = 0; /* Number of Xk I will send */ int_t *ilsum; /* starting position of each supernode in the full array (local) */ /*-- Auxiliary arrays; freed on return --*/ int_t *rb_marker; /* block hit marker; size ceil(NSUPERS/Pr) */ int_t *Urb_length; /* U block length; size ceil(NSUPERS/Pr) */ int_t *Urb_indptr; /* pointers to U index[]; size ceil(NSUPERS/Pr) */ int_t *Urb_fstnz; /* # of fstnz in a block row; size ceil(NSUPERS/Pr) */ int_t *Ucbs; /* number of column blocks in a block row */ int_t *Lrb_length; /* L block length; size ceil(NSUPERS/Pr) */ int_t *Lrb_number; /* global block number; size ceil(NSUPERS/Pr) */ int_t *Lrb_indptr; /* pointers to L index[]; size ceil(NSUPERS/Pr) */ int_t *Lrb_valptr; /* pointers to L nzval[]; size ceil(NSUPERS/Pr) */ double *dense, *dense_col; /* SPA */ double zero = 0.0; int_t ldaspa; /* LDA of SPA */ int_t iword, dword; float mem_use = 0.0; #if ( PRNTlevel>=1 ) int_t nLblocks = 0, nUblocks = 0; #endif #if ( PROFlevel>=1 ) double t, t_u, t_l; int_t u_blks; #endif /* Initialization. */ iam = grid->iam; myrow = MYROW( iam, grid ); mycol = MYCOL( iam, grid ); for (i = 0; i < NBUFFERS; ++i) mybufmax[i] = 0; nsupers = supno[n-1] + 1; Astore = A->Store; a = Astore->nzval; asub = Astore->rowind; xa_begin = Astore->colbeg; xa_end = Astore->colend; #if ( PRNTlevel>=1 ) iword = sizeof(int_t); dword = sizeof(double); #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter ddistribute()"); #endif if ( fact == SamePattern_SameRowPerm ) { /* --------------------------------------------------------------- * REUSE THE L AND U DATA STRUCTURES FROM A PREVIOUS FACTORIZATION. * --------------------------------------------------------------- */ #if ( PROFlevel>=1 ) t_l = t_u = 0; u_blks = 0; #endif /* We can propagate the new values of A into the existing L and U data structures. */ ilsum = Llu->ilsum; ldaspa = Llu->ldalsum; if ( !(dense = doubleCalloc_dist(((size_t)ldaspa) * sp_ienv_dist(3))) ) ABORT("Calloc fails for SPA dense[]."); nrbu = CEILING( nsupers, grid->nprow ); /* No. of local block rows */ if ( !(Urb_length = intCalloc_dist(nrbu)) ) ABORT("Calloc fails for Urb_length[]."); if ( !(Urb_indptr = intMalloc_dist(nrbu)) ) ABORT("Malloc fails for Urb_indptr[]."); Lrowind_bc_ptr = Llu->Lrowind_bc_ptr; Lnzval_bc_ptr = Llu->Lnzval_bc_ptr; Ufstnz_br_ptr = Llu->Ufstnz_br_ptr; Unzval_br_ptr = Llu->Unzval_br_ptr; #if ( PRNTlevel>=1 ) mem_use += 2.0*nrbu*iword + ldaspa*sp_ienv_dist(3)*dword; #endif #if ( PROFlevel>=1 ) t = SuperLU_timer_(); #endif /* Initialize Uval to zero. */ for (lb = 0; lb < nrbu; ++lb) { Urb_indptr[lb] = BR_HEADER; /* Skip header in U index[]. */ index = Ufstnz_br_ptr[lb]; if ( index ) { uval = Unzval_br_ptr[lb]; len = index[1]; for (i = 0; i < len; ++i) uval[i] = zero; } /* if index != NULL */ } /* for lb ... */ for (jb = 0; jb < nsupers; ++jb) { /* Loop through each block column */ pc = PCOL( jb, grid ); if ( mycol == pc ) { /* Block column jb in my process column */ fsupc = FstBlockC( jb ); nsupc = SuperSize( jb ); /* Scatter A into SPA (for L), or into U directly. */ for (j = fsupc, dense_col = dense; j < FstBlockC(jb+1); ++j) { for (i = xa_begin[j]; i < xa_end[j]; ++i) { irow = asub[i]; gb = BlockNum( irow ); if ( myrow == PROW( gb, grid ) ) { lb = LBi( gb, grid ); if ( gb < jb ) { /* in U */ index = Ufstnz_br_ptr[lb]; uval = Unzval_br_ptr[lb]; while ( (k = index[Urb_indptr[lb]]) < jb ) { /* Skip nonzero values in this block */ Urb_length[lb] += index[Urb_indptr[lb]+1]; /* Move pointer to the next block */ Urb_indptr[lb] += UB_DESCRIPTOR + SuperSize( k ); } /*assert(k == jb);*/ /* start fstnz */ istart = Urb_indptr[lb] + UB_DESCRIPTOR; len = Urb_length[lb]; fsupc1 = FstBlockC( gb+1 ); k = j - fsupc; /* Sum the lengths of the leading columns */ for (jj = 0; jj < k; ++jj) len += fsupc1 - index[istart++]; /*assert(irow>=index[istart]);*/ uval[len + irow - index[istart]] = a[i]; } else { /* in L; put in SPA first */ irow = ilsum[lb] + irow - FstBlockC( gb ); dense_col[irow] = a[i]; } } } /* for i ... */ dense_col += ldaspa; } /* for j ... */ #if ( PROFlevel>=1 ) t_u += SuperLU_timer_() - t; t = SuperLU_timer_(); #endif /* Gather the values of A from SPA into Lnzval[]. */ ljb = LBj( jb, grid ); /* Local block number */ index = Lrowind_bc_ptr[ljb]; if ( index ) { nrbl = index[0]; /* Number of row blocks. */ len = index[1]; /* LDA of lusup[]. */ lusup = Lnzval_bc_ptr[ljb]; next_lind = BC_HEADER; next_lval = 0; for (jj = 0; jj < nrbl; ++jj) { gb = index[next_lind++]; len1 = index[next_lind++]; /* Rows in the block. */ lb = LBi( gb, grid ); for (bnnz = 0; bnnz < len1; ++bnnz) { irow = index[next_lind++]; /* Global index. */ irow = ilsum[lb] + irow - FstBlockC( gb ); k = next_lval++; for (j = 0, dense_col = dense; j < nsupc; ++j) { lusup[k] = dense_col[irow]; dense_col[irow] = zero; k += len; dense_col += ldaspa; } } /* for bnnz ... */ } /* for jj ... */ } /* if index ... */ #if ( PROFlevel>=1 ) t_l += SuperLU_timer_() - t; #endif } /* if mycol == pc */ } /* for jb ... */ SUPERLU_FREE(dense); SUPERLU_FREE(Urb_length); SUPERLU_FREE(Urb_indptr); #if ( PROFlevel>=1 ) if ( !iam ) printf(".. 2nd distribute time: L %.2f\tU %.2f\tu_blks %d\tnrbu %d\n", t_l, t_u, u_blks, nrbu); #endif } else { /* -------------------------------------------------- * FIRST TIME CREATING THE L AND U DATA STRUCTURE. * -------------------------------------------------- */ #if ( PROFlevel>=1 ) t_l = t_u = 0; u_blks = 0; #endif /* No L and U data structures are available yet. We need to set up the L and U data structures and propagate the values of A into them. */ lsub = Glu_freeable->lsub; /* compressed L subscripts */ xlsub = Glu_freeable->xlsub; usub = Glu_freeable->usub; /* compressed U subscripts */ xusub = Glu_freeable->xusub; if ( !(ToRecv = SUPERLU_MALLOC(nsupers * sizeof(int))) ) ABORT("Malloc fails for ToRecv[]."); for (i = 0; i < nsupers; ++i) ToRecv[i] = 0; k = CEILING( nsupers, grid->npcol );/* Number of local column blocks */ if ( !(ToSendR = (int **) SUPERLU_MALLOC(k*sizeof(int*))) ) ABORT("Malloc fails for ToSendR[]."); j = k * grid->npcol; if ( !(index1 = SUPERLU_MALLOC(j * sizeof(int))) ) ABORT("Malloc fails for index[]."); #if ( PRNTlevel>=1 ) mem_use += (float) k*sizeof(int_t*) + (j + nsupers)*iword; #endif for (i = 0; i < j; ++i) index1[i] = EMPTY; for (i = 0,j = 0; i < k; ++i, j += grid->npcol) ToSendR[i] = &index1[j]; k = CEILING( nsupers, grid->nprow ); /* Number of local block rows */ /* Pointers to the beginning of each block row of U. */ if ( !(Unzval_br_ptr = (double**)SUPERLU_MALLOC(k * sizeof(double*))) ) ABORT("Malloc fails for Unzval_br_ptr[]."); if ( !(Ufstnz_br_ptr = (int_t**)SUPERLU_MALLOC(k * sizeof(int_t*))) ) ABORT("Malloc fails for Ufstnz_br_ptr[]."); if ( !(ToSendD = SUPERLU_MALLOC(k * sizeof(int))) ) ABORT("Malloc fails for ToSendD[]."); for (i = 0; i < k; ++i) ToSendD[i] = NO; if ( !(ilsum = intMalloc_dist(k+1)) ) ABORT("Malloc fails for ilsum[]."); /* Auxiliary arrays used to set up U block data structures. They are freed on return. */ if ( !(rb_marker = intCalloc_dist(k)) ) ABORT("Calloc fails for rb_marker[]."); if ( !(Urb_length = intCalloc_dist(k)) ) ABORT("Calloc fails for Urb_length[]."); if ( !(Urb_indptr = intMalloc_dist(k)) ) ABORT("Malloc fails for Urb_indptr[]."); if ( !(Urb_fstnz = intCalloc_dist(k)) ) ABORT("Calloc fails for Urb_fstnz[]."); if ( !(Ucbs = intCalloc_dist(k)) ) ABORT("Calloc fails for Ucbs[]."); #if ( PRNTlevel>=1 ) mem_use += 2.0*k*sizeof(int_t*) + (7.0*k+1)*iword; #endif /* Compute ldaspa and ilsum[]. */ ldaspa = 0; ilsum[0] = 0; for (gb = 0; gb < nsupers; ++gb) { if ( myrow == PROW( gb, grid ) ) { i = SuperSize( gb ); ldaspa += i; lb = LBi( gb, grid ); ilsum[lb + 1] = ilsum[lb] + i; } } /* ------------------------------------------------------------ COUNT NUMBER OF ROW BLOCKS AND THE LENGTH OF EACH BLOCK IN U. THIS ACCOUNTS FOR ONE-PASS PROCESSING OF G(U). ------------------------------------------------------------*/ /* Loop through each supernode column. */ for (jb = 0; jb < nsupers; ++jb) { pc = PCOL( jb, grid ); fsupc = FstBlockC( jb ); nsupc = SuperSize( jb ); /* Loop through each column in the block. */ for (j = fsupc; j < fsupc + nsupc; ++j) { /* usub[*] contains only "first nonzero" in each segment. */ for (i = xusub[j]; i < xusub[j+1]; ++i) { irow = usub[i]; /* First nonzero of the segment. */ gb = BlockNum( irow ); kcol = PCOL( gb, grid ); ljb = LBj( gb, grid ); if ( mycol == kcol && mycol != pc ) ToSendR[ljb][pc] = YES; pr = PROW( gb, grid ); lb = LBi( gb, grid ); if ( mycol == pc ) { if ( myrow == pr ) { ToSendD[lb] = YES; /* Count nonzeros in entire block row. */ Urb_length[lb] += FstBlockC( gb+1 ) - irow; if (rb_marker[lb] <= jb) {/* First see the block */ rb_marker[lb] = jb + 1; Urb_fstnz[lb] += nsupc; ++Ucbs[lb]; /* Number of column blocks in block row lb. */ #if ( PRNTlevel>=1 ) ++nUblocks; #endif } ToRecv[gb] = 1; } else ToRecv[gb] = 2; /* Do I need 0, 1, 2 ? */ } } /* for i ... */ } /* for j ... */ } /* for jb ... */ /* Set up the initial pointers for each block row in U. */ nrbu = CEILING( nsupers, grid->nprow );/* Number of local block rows */ for (lb = 0; lb < nrbu; ++lb) { len = Urb_length[lb]; rb_marker[lb] = 0; /* Reset block marker. */ if ( len ) { /* Add room for descriptors */ len1 = Urb_fstnz[lb] + BR_HEADER + Ucbs[lb] * UB_DESCRIPTOR; if ( !(index = intMalloc_dist(len1+1)) ) ABORT("Malloc fails for Uindex[]."); Ufstnz_br_ptr[lb] = index; if ( !(Unzval_br_ptr[lb] = doubleMalloc_dist(len)) ) ABORT("Malloc fails for Unzval_br_ptr[*][]."); mybufmax[2] = SUPERLU_MAX( mybufmax[2], len1 ); mybufmax[3] = SUPERLU_MAX( mybufmax[3], len ); index[0] = Ucbs[lb]; /* Number of column blocks */ index[1] = len; /* Total length of nzval[] */ index[2] = len1; /* Total length of index[] */ index[len1] = -1; /* End marker */ } else { Ufstnz_br_ptr[lb] = NULL; Unzval_br_ptr[lb] = NULL; } Urb_length[lb] = 0; /* Reset block length. */ Urb_indptr[lb] = BR_HEADER; /* Skip header in U index[]. */ Urb_fstnz[lb] = BR_HEADER; } /* for lb ... */ SUPERLU_FREE(Ucbs); #if ( PROFlevel>=1 ) t = SuperLU_timer_() - t; if ( !iam) printf(".. Phase 2 - setup U strut time: %.2f\t\n", t); #endif #if ( PRNTlevel>=1 ) mem_use -= 2.0*k * iword; #endif /* Auxiliary arrays used to set up L block data structures. They are freed on return. k is the number of local row blocks. */ if ( !(Lrb_length = intCalloc_dist(k)) ) ABORT("Calloc fails for Lrb_length[]."); if ( !(Lrb_number = intMalloc_dist(k)) ) ABORT("Malloc fails for Lrb_number[]."); if ( !(Lrb_indptr = intMalloc_dist(k)) ) ABORT("Malloc fails for Lrb_indptr[]."); if ( !(Lrb_valptr = intMalloc_dist(k)) ) ABORT("Malloc fails for Lrb_valptr[]."); if (!(dense=doubleCalloc_dist(SUPERLU_MAX(1,((size_t)ldaspa) *sp_ienv_dist(3))))) ABORT("Calloc fails for SPA dense[]."); /* These counts will be used for triangular solves. */ if ( !(fmod = intCalloc_dist(k)) ) ABORT("Calloc fails for fmod[]."); if ( !(bmod = intCalloc_dist(k)) ) ABORT("Calloc fails for bmod[]."); #if ( PRNTlevel>=1 ) mem_use += 6.0*k*iword + ldaspa*sp_ienv_dist(3)*dword; #endif k = CEILING( nsupers, grid->npcol );/* Number of local block columns */ /* Pointers to the beginning of each block column of L. */ if ( !(Lnzval_bc_ptr = (double**)SUPERLU_MALLOC(k * sizeof(double*))) ) ABORT("Malloc fails for Lnzval_bc_ptr[]."); if ( !(Lrowind_bc_ptr = (int_t**)SUPERLU_MALLOC(k * sizeof(int_t*))) ) ABORT("Malloc fails for Lrowind_bc_ptr[]."); Lrowind_bc_ptr[k-1] = NULL; /* These lists of processes will be used for triangular solves. */ if ( !(fsendx_plist = (int_t **) SUPERLU_MALLOC(k*sizeof(int_t*))) ) ABORT("Malloc fails for fsendx_plist[]."); len = k * grid->nprow; if ( !(index = intMalloc_dist(len)) ) ABORT("Malloc fails for fsendx_plist[0]"); for (i = 0; i < len; ++i) index[i] = EMPTY; for (i = 0, j = 0; i < k; ++i, j += grid->nprow) fsendx_plist[i] = &index[j]; if ( !(bsendx_plist = (int_t **) SUPERLU_MALLOC(k*sizeof(int_t*))) ) ABORT("Malloc fails for bsendx_plist[]."); if ( !(index = intMalloc_dist(len)) ) ABORT("Malloc fails for bsendx_plist[0]"); for (i = 0; i < len; ++i) index[i] = EMPTY; for (i = 0, j = 0; i < k; ++i, j += grid->nprow) bsendx_plist[i] = &index[j]; #if ( PRNTlevel>=1 ) mem_use += 4.0*k*sizeof(int_t*) + 2.0*len*iword; #endif /*------------------------------------------------------------ PROPAGATE ROW SUBSCRIPTS AND VALUES OF A INTO L AND U BLOCKS. THIS ACCOUNTS FOR ONE-PASS PROCESSING OF A, L AND U. ------------------------------------------------------------*/ for (jb = 0; jb < nsupers; ++jb) { pc = PCOL( jb, grid ); if ( mycol == pc ) { /* Block column jb in my process column */ fsupc = FstBlockC( jb ); nsupc = SuperSize( jb ); ljb = LBj( jb, grid ); /* Local block number */ /* Scatter A into SPA. */ for (j = fsupc, dense_col = dense; j < FstBlockC( jb+1 ); ++j){ for (i = xa_begin[j]; i < xa_end[j]; ++i) { irow = asub[i]; gb = BlockNum( irow ); if ( myrow == PROW( gb, grid ) ) { lb = LBi( gb, grid ); irow = ilsum[lb] + irow - FstBlockC( gb ); dense_col[irow] = a[i]; } } dense_col += ldaspa; } jbrow = PROW( jb, grid ); #if ( PROFlevel>=1 ) t = SuperLU_timer_(); #endif /*------------------------------------------------ * SET UP U BLOCKS. *------------------------------------------------*/ kseen = 0; dense_col = dense; /* Loop through each column in the block column. */ for (j = fsupc; j < FstBlockC( jb+1 ); ++j) { istart = xusub[j]; /* NOTE: Only the first nonzero index of the segment is stored in usub[]. */ for (i = istart; i < xusub[j+1]; ++i) { irow = usub[i]; /* First nonzero in the segment. */ gb = BlockNum( irow ); pr = PROW( gb, grid ); if ( pr != jbrow && myrow == jbrow && /* diag. proc. owning jb */ bsendx_plist[ljb][pr] == EMPTY ) { bsendx_plist[ljb][pr] = YES; ++nbsendx; } if ( myrow == pr ) { lb = LBi( gb, grid ); /* Local block number */ index = Ufstnz_br_ptr[lb]; uval = Unzval_br_ptr[lb]; fsupc1 = FstBlockC( gb+1 ); if (rb_marker[lb] <= jb) { /* First time see the block */ rb_marker[lb] = jb + 1; Urb_indptr[lb] = Urb_fstnz[lb];; index[Urb_indptr[lb]] = jb; /* Descriptor */ Urb_indptr[lb] += UB_DESCRIPTOR; /* Record the first location in index[] of the next block */ Urb_fstnz[lb] = Urb_indptr[lb] + nsupc; len = Urb_indptr[lb];/* Start fstnz in index */ index[len-1] = 0; for (k = 0; k < nsupc; ++k) index[len+k] = fsupc1; if ( gb != jb )/* Exclude diagonal block. */ ++bmod[lb];/* Mod. count for back solve */ if ( kseen == 0 && myrow != jbrow ) { ++nbrecvx; kseen = 1; } } else { /* Already saw the block */ len = Urb_indptr[lb];/* Start fstnz in index */ } jj = j - fsupc; index[len+jj] = irow; /* Load the numerical values */ k = fsupc1 - irow; /* No. of nonzeros in segment */ index[len-1] += k; /* Increment block length in Descriptor */ irow = ilsum[lb] + irow - FstBlockC( gb ); for (ii = 0; ii < k; ++ii) { uval[Urb_length[lb]++] = dense_col[irow + ii]; dense_col[irow + ii] = zero; } } /* if myrow == pr ... */ } /* for i ... */ dense_col += ldaspa; } /* for j ... */ #if ( PROFlevel>=1 ) t_u += SuperLU_timer_() - t; t = SuperLU_timer_(); #endif /*------------------------------------------------ * SET UP L BLOCKS. *------------------------------------------------*/ /* Count number of blocks and length of each block. */ nrbl = 0; len = 0; /* Number of row subscripts I own. */ kseen = 0; istart = xlsub[fsupc]; for (i = istart; i < xlsub[fsupc+1]; ++i) { irow = lsub[i]; gb = BlockNum( irow ); /* Global block number */ pr = PROW( gb, grid ); /* Process row owning this block */ if ( pr != jbrow && myrow == jbrow && /* diag. proc. owning jb */ fsendx_plist[ljb][pr] == EMPTY /* first time */ ) { fsendx_plist[ljb][pr] = YES; ++nfsendx; } if ( myrow == pr ) { lb = LBi( gb, grid ); /* Local block number */ if (rb_marker[lb] <= jb) { /* First see this block */ rb_marker[lb] = jb + 1; Lrb_length[lb] = 1; Lrb_number[nrbl++] = gb; if ( gb != jb ) /* Exclude diagonal block. */ ++fmod[lb]; /* Mod. count for forward solve */ if ( kseen == 0 && myrow != jbrow ) { ++nfrecvx; kseen = 1; } #if ( PRNTlevel>=1 ) ++nLblocks; #endif } else { ++Lrb_length[lb]; } ++len; } } /* for i ... */ if ( nrbl ) { /* Do not ensure the blocks are sorted! */ /* Set up the initial pointers for each block in index[] and nzval[]. */ /* Add room for descriptors */ len1 = len + BC_HEADER + nrbl * LB_DESCRIPTOR; if ( !(index = intMalloc_dist(len1)) ) ABORT("Malloc fails for index[]"); Lrowind_bc_ptr[ljb] = index; if (!(Lnzval_bc_ptr[ljb] = doubleMalloc_dist(((size_t)len)*nsupc))) { fprintf(stderr, "col block " IFMT " ", jb); ABORT("Malloc fails for Lnzval_bc_ptr[*][]"); } mybufmax[0] = SUPERLU_MAX( mybufmax[0], len1 ); mybufmax[1] = SUPERLU_MAX( mybufmax[1], len*nsupc ); mybufmax[4] = SUPERLU_MAX( mybufmax[4], len ); index[0] = nrbl; /* Number of row blocks */ index[1] = len; /* LDA of the nzval[] */ next_lind = BC_HEADER; next_lval = 0; for (k = 0; k < nrbl; ++k) { gb = Lrb_number[k]; lb = LBi( gb, grid ); len = Lrb_length[lb]; Lrb_length[lb] = 0; /* Reset vector of block length */ index[next_lind++] = gb; /* Descriptor */ index[next_lind++] = len; Lrb_indptr[lb] = next_lind; Lrb_valptr[lb] = next_lval; next_lind += len; next_lval += len; } /* Propagate the compressed row subscripts to Lindex[], and the initial values of A from SPA into Lnzval[]. */ lusup = Lnzval_bc_ptr[ljb]; len = index[1]; /* LDA of lusup[] */ for (i = istart; i < xlsub[fsupc+1]; ++i) { irow = lsub[i]; gb = BlockNum( irow ); if ( myrow == PROW( gb, grid ) ) { lb = LBi( gb, grid ); k = Lrb_indptr[lb]++; /* Random access a block */ index[k] = irow; k = Lrb_valptr[lb]++; irow = ilsum[lb] + irow - FstBlockC( gb ); for (j = 0, dense_col = dense; j < nsupc; ++j) { lusup[k] = dense_col[irow]; dense_col[irow] = 0.0; k += len; dense_col += ldaspa; } } } /* for i ... */ } else { Lrowind_bc_ptr[ljb] = NULL; Lnzval_bc_ptr[ljb] = NULL; } /* if nrbl ... */ #if ( PROFlevel>=1 ) t_l += SuperLU_timer_() - t; #endif } /* if mycol == pc */ } /* for jb ... */ Llu->Lrowind_bc_ptr = Lrowind_bc_ptr; Llu->Lnzval_bc_ptr = Lnzval_bc_ptr; Llu->Ufstnz_br_ptr = Ufstnz_br_ptr; Llu->Unzval_br_ptr = Unzval_br_ptr; Llu->ToRecv = ToRecv; Llu->ToSendD = ToSendD; Llu->ToSendR = ToSendR; Llu->fmod = fmod; Llu->fsendx_plist = fsendx_plist; Llu->nfrecvx = nfrecvx; Llu->nfsendx = nfsendx; Llu->bmod = bmod; Llu->bsendx_plist = bsendx_plist; Llu->nbrecvx = nbrecvx; Llu->nbsendx = nbsendx; Llu->ilsum = ilsum; Llu->ldalsum = ldaspa; #if ( PRNTlevel>=1 ) if ( !iam ) printf(".. # L blocks " IFMT "\t# U blocks " IFMT "\n", nLblocks, nUblocks); #endif SUPERLU_FREE(rb_marker); SUPERLU_FREE(Urb_fstnz); SUPERLU_FREE(Urb_length); SUPERLU_FREE(Urb_indptr); SUPERLU_FREE(Lrb_length); SUPERLU_FREE(Lrb_number); SUPERLU_FREE(Lrb_indptr); SUPERLU_FREE(Lrb_valptr); SUPERLU_FREE(dense); k = CEILING( nsupers, grid->nprow );/* Number of local block rows */ if ( !(Llu->mod_bit = intMalloc_dist(k)) ) ABORT("Malloc fails for mod_bit[]."); /* Find the maximum buffer size. */ MPI_Allreduce(mybufmax, Llu->bufmax, NBUFFERS, mpi_int_t, MPI_MAX, grid->comm); #if ( PROFlevel>=1 ) if ( !iam ) printf(".. 1st distribute time:\n " "\tL\t%.2f\n\tU\t%.2f\n" "\tu_blks %d\tnrbu %d\n--------\n", t_l, t_u, u_blks, nrbu); #endif } /* else fact != SamePattern_SameRowPerm */ #if ( DEBUGlevel>=1 ) /* Memory allocated but not freed: ilsum, fmod, fsendx_plist, bmod, bsendx_plist */ CHECK_MALLOC(iam, "Exit ddistribute()"); #endif return (mem_use); } /* DDISTRIBUTE */ SuperLU_DIST_5.3.0/SRC/pzGetDiagU.c0000644013363400111340000000676513233431301015450 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file p@(pre)GetDiagU.c * \brief Extracts the main diagonal of matrix U * *
 * -- Auxiliary routine in distributed SuperLU (version 5.1.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * Xiaoye S. Li
 * Created:  April 16, 2002
 * Modified: May 15, 2016
 * 
*/ #include "superlu_zdefs.h" /*! \brief * *
 * Purpose
 * =======
 *
 * GetDiagU extracts the main diagonal of matrix U of the LU factorization.
 *  
 * Arguments
 * =========
 *
 * n        (input) int
 *          Dimension of the matrix.
 *
 * LUstruct (input) LUstruct_t*
 *          The data structures to store the distributed L and U factors.
 *          see superlu_ddefs.h for its definition.
 *
 * grid     (input) gridinfo_t*
 *          The 2D process mesh. It contains the MPI communicator, the number
 *          of process rows (NPROW), the number of process columns (NPCOL),
 *          and my process rank. It is an input argument to all the
 *          parallel routines.
 *
 * diagU    (output) double*, dimension (n)
 *          The main diagonal of matrix U.
 *          On exit, it is available on all processes.
 *
 *
 * Note
 * ====
 *
 * The diagonal blocks of the L and U matrices are stored in the L
 * data structures, and are on the diagonal processes of the
 * 2D process grid.
 *
 * This routine is modified from gather_diag_to_all() in pzgstrs_Bglobal.c.
 * 
*/ void pzGetDiagU(int_t n, LUstruct_t *LUstruct, gridinfo_t *grid, doublecomplex *diagU) { int_t *xsup; int iam, knsupc, pkk; int nsupr; /* number of rows in the block L(:,k) (LDA) */ int_t i, j, jj, k, lk, lwork, nsupers, p; int_t num_diag_procs, *diag_procs, *diag_len; Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; doublecomplex *zblock, *zwork, *lusup; iam = grid->iam; nsupers = Glu_persist->supno[n-1] + 1; xsup = Glu_persist->xsup; get_diag_procs(n, Glu_persist, grid, &num_diag_procs, &diag_procs, &diag_len); jj = diag_len[0]; for (j = 1; j < num_diag_procs; ++j) jj = SUPERLU_MAX( jj, diag_len[j] ); if ( !(zwork = doublecomplexMalloc_dist(jj)) ) ABORT("Malloc fails for zwork[]"); for (p = 0; p < num_diag_procs; ++p) { pkk = diag_procs[p]; if ( iam == pkk ) { /* Copy diagonal into buffer dwork[]. */ lwork = 0; for (k = p; k < nsupers; k += num_diag_procs) { knsupc = SuperSize( k ); lk = LBj( k, grid ); nsupr = Llu->Lrowind_bc_ptr[lk][1]; /* LDA of lusup[] */ lusup = Llu->Lnzval_bc_ptr[lk]; for (i = 0; i < knsupc; ++i) /* Copy the diagonal. */ zwork[lwork+i] = lusup[i*(nsupr+1)]; lwork += knsupc; } MPI_Bcast( zwork, lwork, SuperLU_MPI_DOUBLE_COMPLEX, pkk, grid->comm ); } else { MPI_Bcast( zwork, diag_len[p], SuperLU_MPI_DOUBLE_COMPLEX, pkk, grid->comm ); } /* Scatter zwork[] into global diagU vector. */ lwork = 0; for (k = p; k < nsupers; k += num_diag_procs) { knsupc = SuperSize( k ); zblock = &diagU[FstBlockC( k )]; for (i = 0; i < knsupc; ++i) zblock[i] = zwork[lwork+i]; lwork += knsupc; } } /* for p = ... */ SUPERLU_FREE(diag_procs); SUPERLU_FREE(diag_len); SUPERLU_FREE(zwork); } SuperLU_DIST_5.3.0/SRC/superlu_ddefs.h0000644013363400111340000004076013233431301016307 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Distributed SuperLU data types and function prototypes * *
 * -- Distributed SuperLU routine (version 4.1) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * November 1, 2007
 * April 5, 2015
 * 
*/ #ifndef __SUPERLU_dDEFS /* allow multiple inclusions */ #define __SUPERLU_dDEFS /* * File name: superlu_ddefs.h * Purpose: Distributed SuperLU data types and function prototypes * History: */ #include "superlu_defs.h" /*-- Auxiliary data type used in PxGSTRS/PxGSTRS1. */ typedef struct { int_t lbnum; /* Row block number (local). */ int_t indpos; /* Starting position in Uindex[]. */ } Ucb_indptr_t; /* * On each processor, the blocks in L are stored in compressed block * column format, the blocks in U are stored in compressed block row format. */ #define MAX_LOOKAHEADS 50 typedef struct { int_t **Lrowind_bc_ptr; /* size ceil(NSUPERS/Pc) */ double **Lnzval_bc_ptr; /* size ceil(NSUPERS/Pc) */ int_t **Ufstnz_br_ptr; /* size ceil(NSUPERS/Pr) */ double **Unzval_br_ptr; /* size ceil(NSUPERS/Pr) */ #if 0 int_t *Lsub_buf; /* Buffer for the remote subscripts of L */ double *Lval_buf; /* Buffer for the remote nonzeros of L */ int_t *Usub_buf; /* Buffer for the remote subscripts of U */ double *Uval_buf; /* Buffer for the remote nonzeros of U */ #endif int_t *Lsub_buf_2[MAX_LOOKAHEADS]; /* Buffers for the remote subscripts of L*/ double *Lval_buf_2[MAX_LOOKAHEADS]; /* Buffers for the remote nonzeros of L */ int_t *Usub_buf_2[MAX_LOOKAHEADS]; /* Buffer for the remote subscripts of U */ double *Uval_buf_2[MAX_LOOKAHEADS]; /* Buffer for the remote nonzeros of U */ double *ujrow; /* used in panel factorization. */ int_t bufmax[NBUFFERS]; /* Maximum buffer size across all MPI ranks: * 0 : maximum size of Lsub_buf[] * 1 : maximum size of Lval_buf[] * 2 : maximum size of Usub_buf[] * 3 : maximum size of Uval_buf[] * 4 : maximum size of tempv[LDA] */ /*-- Record communication schedule for factorization. --*/ int *ToRecv; /* Recv from no one (0), left (1), and up (2).*/ int *ToSendD; /* Whether need to send down block row. */ int **ToSendR; /* List of processes to send right block col. */ /*-- Record communication schedule for forward/back solves. --*/ int_t *fmod; /* Modification count for L-solve */ int_t **fsendx_plist; /* Column process list to send down Xk */ int_t *frecv; /* Modifications to be recv'd in proc row */ int_t nfrecvx; /* Number of Xk I will receive in L-solve */ int_t nfsendx; /* Number of Xk I will send in L-solve */ int_t *bmod; /* Modification count for U-solve */ int_t **bsendx_plist; /* Column process list to send down Xk */ int_t *brecv; /* Modifications to be recv'd in proc row */ int_t nbrecvx; /* Number of Xk I will receive in U-solve */ int_t nbsendx; /* Number of Xk I will send in U-solve */ int_t *mod_bit; /* Flag contribution from each row blocks */ /*-- Auxiliary arrays used for forward/back solves. --*/ int_t *ilsum; /* Starting position of each supernode in lsum (local) */ int_t ldalsum; /* LDA of lsum (local) */ int_t SolveMsgSent; /* Number of actual messages sent in LU-solve */ int_t SolveMsgVol; /* Volume of messages sent in the solve phase */ /*********************/ /* The following variables are used in the hybrid solver */ /*-- Counts to be used in U^{-T} triangular solve. -- */ int_t UT_SOLVE; int_t L_SOLVE; int_t FRECV; int_t ut_ldalsum; /* LDA of lsum (local) */ int_t *ut_ilsum; /* ilsum in column-wise */ int_t *utmod; /* Modification count for Ut-solve. */ int_t **ut_sendx_plist; /* Row process list to send down Xk */ int_t *utrecv; /* Modifications to be recev'd in proc column. */ int_t n_utsendx; /* Number of Xk I will receive */ int_t n_utrecvx; /* Number of Xk I will send */ int_t n_utrecvmod; int_t nroot; int_t *ut_modbit; int_t *Urbs; Ucb_indptr_t **Ucb_indptr;/* Vertical linked list pointing to Uindex[] */ int_t **Ucb_valptr; /* Vertical linked list pointing to Unzval[] */ /* some additional counters for L solve */ int_t n; int_t nleaf; int_t nfrecvmod; } LocalLU_t; typedef struct { int_t *etree; Glu_persist_t *Glu_persist; LocalLU_t *Llu; } LUstruct_t; /*-- Data structure for communication during matrix-vector multiplication. */ typedef struct { int_t *extern_start; int_t *ind_tosend; /* X indeices to be sent to other processes */ int_t *ind_torecv; /* X indeices to be received from other processes */ int_t *ptr_ind_tosend;/* Printers to ind_tosend[] (Size procs) (also point to val_torecv) */ int_t *ptr_ind_torecv;/* Printers to ind_torecv[] (Size procs) (also point to val_tosend) */ int *SendCounts; /* Numbers of X indices to be sent (also numbers of X values to be received) */ int *RecvCounts; /* Numbers of X indices to be received (also numbers of X values to be sent) */ double *val_tosend; /* X values to be sent to other processes */ double *val_torecv; /* X values to be received from other processes */ int_t TotalIndSend; /* Total number of indices to be sent (also total number of values to be received) */ int_t TotalValSend; /* Total number of values to be sent. (also total number of indices to be received) */ } pdgsmv_comm_t; /*-- Data structure holding the information for the solution phase --*/ typedef struct { int_t *row_to_proc; int_t *inv_perm_c; int_t num_diag_procs, *diag_procs, *diag_len; pdgsmv_comm_t *gsmv_comm; /* communication metadata for SpMV, required by IterRefine. */ pxgstrs_comm_t *gstrs_comm; /* communication metadata for SpTRSV. */ int_t *A_colind_gsmv; /* After pdgsmv_init(), the global column indices of A are translated into the relative positions in the gathered x-vector. This is re-used in repeated calls to pdgsmv() */ int_t *xrow_to_proc; /* used by PDSLin */ } SOLVEstruct_t; /*********************************************************************** * Function prototypes ***********************************************************************/ #ifdef __cplusplus extern "C" { #endif /* Supernodal LU factor related */ extern void dCreate_CompCol_Matrix_dist(SuperMatrix *, int_t, int_t, int_t, double *, int_t *, int_t *, Stype_t, Dtype_t, Mtype_t); extern void dCreate_CompRowLoc_Matrix_dist(SuperMatrix *, int_t, int_t, int_t, int_t, int_t, double *, int_t *, int_t *, Stype_t, Dtype_t, Mtype_t); extern void dCompRow_to_CompCol_dist(int_t, int_t, int_t, double *, int_t *, int_t *, double **, int_t **, int_t **); extern int pdCompRow_loc_to_CompCol_global(int_t, SuperMatrix *, gridinfo_t *, SuperMatrix *); extern void dCopy_CompCol_Matrix_dist(SuperMatrix *, SuperMatrix *); extern void dCreate_Dense_Matrix_dist(SuperMatrix *, int_t, int_t, double *, int_t, Stype_t, Dtype_t, Mtype_t); extern void dCreate_SuperNode_Matrix_dist(SuperMatrix *, int_t, int_t, int_t, double *, int_t *, int_t *, int_t *, int_t *, int_t *, Stype_t, Dtype_t, Mtype_t); extern void dCopy_Dense_Matrix_dist(int_t, int_t, double *, int_t, double *, int_t); extern void dallocateA_dist (int_t, int_t, double **, int_t **, int_t **); extern void dGenXtrue_dist (int_t, int_t, double *, int_t); extern void dFillRHS_dist (char *, int_t, double *, int_t, SuperMatrix *, double *, int_t); extern int dcreate_matrix(SuperMatrix *, int, double **, int *, double **, int *, FILE *, gridinfo_t *); extern int dcreate_matrix_rb(SuperMatrix *, int, double **, int *, double **, int *, FILE *, gridinfo_t *); extern int dcreate_matrix_dat(SuperMatrix *, int, double **, int *, double **, int *, FILE *, gridinfo_t *); /* Driver related */ extern void dgsequ_dist (SuperMatrix *, double *, double *, double *, double *, double *, int_t *); extern double dlangs_dist (char *, SuperMatrix *); extern void dlaqgs_dist (SuperMatrix *, double *, double *, double, double, double, char *); extern void pdgsequ (SuperMatrix *, double *, double *, double *, double *, double *, int_t *, gridinfo_t *); extern double pdlangs (char *, SuperMatrix *, gridinfo_t *); extern void pdlaqgs (SuperMatrix *, double *, double *, double, double, double, char *); extern int pdPermute_Dense_Matrix(int_t, int_t, int_t [], int_t[], double [], int, double [], int, int, gridinfo_t *); extern int sp_dtrsv_dist (char *, char *, char *, SuperMatrix *, SuperMatrix *, double *, int *); extern int sp_dgemv_dist (char *, double, SuperMatrix *, double *, int, double, double *, int); extern int sp_dgemm_dist (char *, int, double, SuperMatrix *, double *, int, double, double *, int); extern float ddistribute(fact_t, int_t, SuperMatrix *, Glu_freeable_t *, LUstruct_t *, gridinfo_t *); extern void pdgssvx_ABglobal(superlu_dist_options_t *, SuperMatrix *, ScalePermstruct_t *, double *, int, int, gridinfo_t *, LUstruct_t *, double *, SuperLUStat_t *, int *); extern float pddistribute(fact_t, int_t, SuperMatrix *, ScalePermstruct_t *, Glu_freeable_t *, LUstruct_t *, gridinfo_t *); extern void pdgssvx(superlu_dist_options_t *, SuperMatrix *, ScalePermstruct_t *, double *, int, int, gridinfo_t *, LUstruct_t *, SOLVEstruct_t *, double *, SuperLUStat_t *, int *); extern int dSolveInit(superlu_dist_options_t *, SuperMatrix *, int_t [], int_t [], int_t, LUstruct_t *, gridinfo_t *, SOLVEstruct_t *); extern void dSolveFinalize(superlu_dist_options_t *, SOLVEstruct_t *); extern int_t pxgstrs_init(int_t, int_t, int_t, int_t, int_t [], int_t [], gridinfo_t *grid, Glu_persist_t *, SOLVEstruct_t *); extern void pxgstrs_finalize(pxgstrs_comm_t *); extern int dldperm_dist(int_t, int_t, int_t, int_t [], int_t [], double [], int_t *, double [], double []); extern int static_schedule(superlu_dist_options_t *, int, int, LUstruct_t *, gridinfo_t *, SuperLUStat_t *, int_t *, int_t *, int *); extern void LUstructInit(const int_t, LUstruct_t *); extern void LUstructFree(LUstruct_t *); extern void Destroy_LU(int_t, gridinfo_t *, LUstruct_t *); /* #define GPU_PROF #define IPM_PROF */ extern int_t pdgstrf(superlu_dist_options_t *, int, int, double, LUstruct_t*, gridinfo_t*, SuperLUStat_t*, int*); extern void pdgstrs_Bglobal(int_t, LUstruct_t *, gridinfo_t *, double *, int_t, int, SuperLUStat_t *, int *); extern void pdgstrs(int_t, LUstruct_t *, ScalePermstruct_t *, gridinfo_t *, double *, int_t, int_t, int_t, int, SOLVEstruct_t *, SuperLUStat_t *, int *); extern void dlsum_fmod(double *, double *, double *, double *, int, int, int_t , int_t *, int_t, int_t, int_t, int_t *, gridinfo_t *, LocalLU_t *, MPI_Request [], SuperLUStat_t *); extern void dlsum_bmod(double *, double *, double *, int, int_t, int_t *, int_t *, Ucb_indptr_t **, int_t **, int_t *, gridinfo_t *, LocalLU_t *, MPI_Request [], SuperLUStat_t *); extern void pdgsrfs(int_t, SuperMatrix *, double, LUstruct_t *, ScalePermstruct_t *, gridinfo_t *, double [], int_t, double [], int_t, int, SOLVEstruct_t *, double *, SuperLUStat_t *, int *); extern void pdgsrfs_ABXglobal(int_t, SuperMatrix *, double, LUstruct_t *, gridinfo_t *, double *, int_t, double *, int_t, int, double *, SuperLUStat_t *, int *); extern int pdgsmv_AXglobal_setup(SuperMatrix *, Glu_persist_t *, gridinfo_t *, int_t *, int_t *[], double *[], int_t *[], int_t []); extern int pdgsmv_AXglobal(int_t, int_t [], double [], int_t [], double [], double []); extern int pdgsmv_AXglobal_abs(int_t, int_t [], double [], int_t [], double [], double []); extern void pdgsmv_init(SuperMatrix *, int_t *, gridinfo_t *, pdgsmv_comm_t *); extern void pdgsmv(int_t, SuperMatrix *, gridinfo_t *, pdgsmv_comm_t *, double x[], double ax[]); extern void pdgsmv_finalize(pdgsmv_comm_t *); /* Memory-related */ extern double *doubleMalloc_dist(int_t); extern double *doubleCalloc_dist(int_t); extern void *duser_malloc_dist (int_t, int_t); extern void duser_free_dist (int_t, int_t); extern int_t dQuerySpace_dist(int_t, LUstruct_t *, gridinfo_t *, SuperLUStat_t *, superlu_dist_mem_usage_t *); /* Auxiliary routines */ extern void dfill_dist (double *, int_t, double); extern void dinf_norm_error_dist (int_t, int_t, double*, int_t, double*, int_t, gridinfo_t*); extern void pdinf_norm_error(int, int_t, int_t, double [], int_t, double [], int_t , gridinfo_t *); extern void dreadhb_dist (int, FILE *, int_t *, int_t *, int_t *, double **, int_t **, int_t **); extern void dreadtriple_dist(FILE *, int_t *, int_t *, int_t *, double **, int_t **, int_t **); extern void dreadrb_dist(int, FILE *, int_t *, int_t *, int_t *, double **, int_t **, int_t **); extern void dreadMM_dist(FILE *, int_t *, int_t *, int_t *, double **, int_t **, int_t **); /* Distribute the data for numerical factorization */ extern float ddist_psymbtonum(fact_t, int_t, SuperMatrix *, ScalePermstruct_t *, Pslu_freeable_t *, LUstruct_t *, gridinfo_t *); extern void pdGetDiagU(int_t, LUstruct_t *, gridinfo_t *, double *); /* Routines for debugging */ extern void dPrintLblocks(int, int_t, gridinfo_t *, Glu_persist_t *, LocalLU_t *); extern void dPrintUblocks(int, int_t, gridinfo_t *, Glu_persist_t *, LocalLU_t *); extern void dPrint_CompCol_Matrix_dist(SuperMatrix *); extern void dPrint_Dense_Matrix_dist(SuperMatrix *); extern int dPrint_CompRowLoc_Matrix_dist(SuperMatrix *); extern int file_PrintDouble5(FILE *, char *, int_t, double *); /* BLAS */ #ifdef USE_VENDOR_BLAS extern void dgemm_(const char*, const char*, const int*, const int*, const int*, const double*, const double*, const int*, const double*, const int*, const double*, double*, const int*, int, int); extern void dtrsv_(char*, char*, char*, int*, double*, int*, double*, int*, int, int, int); extern void dtrsm_(char*, char*, char*, char*, int*, int*, double*, double*, int*, double*, int*, int, int, int, int); extern void dgemv_(char *, int *, int *, double *, double *a, int *, double *, int *, double *, double *, int *, int); extern void dger_(int*, int*, double*, double*, int*, double*, int*, double*, int*); #else extern int dgemm_(const char*, const char*, const int*, const int*, const int*, const double*, const double*, const int*, const double*, const int*, const double*, double*, const int*); extern int dtrsv_(char*, char*, char*, int*, double*, int*, double*, int*); extern int dtrsm_(char*, char*, char*, char*, int*, int*, double*, double*, int*, double*, int*); extern int dgemv_(char *, int *, int *, double *, double *a, int *, double *, int *, double *, double *, int *); extern void dger_(int*, int*, double*, double*, int*, double*, int*, double*, int*); #endif #ifdef __cplusplus } #endif #endif /* __SUPERLU_dDEFS */ SuperLU_DIST_5.3.0/SRC/zgsequ_dist.c0000644013363400111340000001322613233431301015774 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Computes row and column scalings */ /* * File name: zgsequ.c * History: Modified from LAPACK routine ZGEEQU */ #include #include "superlu_zdefs.h" /*! \brief
   
    Purpose   
    =======   

    ZGSEQU_DIST computes row and column scalings intended to equilibrate an   
    M-by-N sparse matrix A and reduce its condition number. R returns the row
    scale factors and C the column scale factors, chosen to try to make   
    the largest element in each row and column of the matrix B with   
    elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1.   

    R(i) and C(j) are restricted to be between SMLNUM = smallest safe   
    number and BIGNUM = largest safe number.  Use of these scaling   
    factors is not guaranteed to reduce the condition number of A but   
    works well in practice.   

    See supermatrix.h for the definition of 'SuperMatrix' structure.
 
    Arguments   
    =========   

    A       (input) SuperMatrix*
            The matrix of dimension (A->nrow, A->ncol) whose equilibration
            factors are to be computed. The type of A can be:
            Stype = SLU_NC; Dtype = SLU_Z; Mtype = SLU_GE.
	    
    R       (output) double*, size A->nrow
            If INFO = 0 or INFO > M, R contains the row scale factors   
            for A.
	    
    C       (output) double*, size A->ncol
            If INFO = 0,  C contains the column scale factors for A.
	    
    ROWCND  (output) double*
            If INFO = 0 or INFO > M, ROWCND contains the ratio of the   
            smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and   
            AMAX is neither too large nor too small, it is not worth   
            scaling by R.
	    
    COLCND  (output) double*
            If INFO = 0, COLCND contains the ratio of the smallest   
            C(i) to the largest C(i).  If COLCND >= 0.1, it is not   
            worth scaling by C.
	    
    AMAX    (output) double*
            Absolute value of largest matrix element.  If AMAX is very   
            close to overflow or very close to underflow, the matrix   
            should be scaled.
	    
    INFO    (output) int*
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   
            > 0:  if INFO = i,  and i is   
                  <= M:  the i-th row of A is exactly zero   
                  >  M:  the (i-M)-th column of A is exactly zero   

    ===================================================================== 
*/ void zgsequ_dist(SuperMatrix *A, double *r, double *c, double *rowcnd, double *colcnd, double *amax, int_t *info) { /* Local variables */ NCformat *Astore; doublecomplex *Aval; int i, j, irow; double rcmin, rcmax; double bignum, smlnum; /* Test the input parameters. */ *info = 0; if ( A->nrow < 0 || A->ncol < 0 || A->Stype != SLU_NC || A->Dtype != SLU_Z || A->Mtype != SLU_GE ) *info = -1; if (*info != 0) { i = -(*info); xerr_dist("zgsequ_dist", &i); return; } /* Quick return if possible */ if ( A->nrow == 0 || A->ncol == 0 ) { *rowcnd = 1.; *colcnd = 1.; *amax = 0.; return; } Astore = (NCformat *) A->Store; Aval = (doublecomplex *) Astore->nzval; /* Get machine constants. */ smlnum = dmach_dist("S"); bignum = 1. / smlnum; /* Compute row scale factors. */ for (i = 0; i < A->nrow; ++i) r[i] = 0.; /* Find the maximum element in each row. */ for (j = 0; j < A->ncol; ++j) for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; r[irow] = SUPERLU_MAX( r[irow], slud_z_abs1(&Aval[i]) ); } /* Find the maximum and minimum scale factors. */ rcmin = bignum; rcmax = 0.; for (i = 0; i < A->nrow; ++i) { rcmax = SUPERLU_MAX(rcmax, r[i]); rcmin = SUPERLU_MIN(rcmin, r[i]); } *amax = rcmax; if (rcmin == 0.) { /* Find the first zero scale factor and return an error code. */ for (i = 0; i < A->nrow; ++i) if (r[i] == 0.) { *info = i + 1; return; } } else { /* Invert the scale factors. */ for (i = 0; i < A->nrow; ++i) r[i] = 1. / SUPERLU_MIN( SUPERLU_MAX( r[i], smlnum ), bignum ); /* Compute ROWCND = min(R(I)) / max(R(I)) */ *rowcnd = SUPERLU_MAX( rcmin, smlnum ) / SUPERLU_MIN( rcmax, bignum ); } /* Compute column scale factors */ for (j = 0; j < A->ncol; ++j) c[j] = 0.; /* Find the maximum element in each column, assuming the row scalings computed above. */ for (j = 0; j < A->ncol; ++j) for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; c[j] = SUPERLU_MAX( c[j], slud_z_abs1(&Aval[i]) * r[irow] ); } /* Find the maximum and minimum scale factors. */ rcmin = bignum; rcmax = 0.; for (j = 0; j < A->ncol; ++j) { rcmax = SUPERLU_MAX(rcmax, c[j]); rcmin = SUPERLU_MIN(rcmin, c[j]); } if (rcmin == 0.) { /* Find the first zero scale factor and return an error code. */ for (j = 0; j < A->ncol; ++j) if ( c[j] == 0. ) { *info = A->nrow + j + 1; return; } } else { /* Invert the scale factors. */ for (j = 0; j < A->ncol; ++j) c[j] = 1. / SUPERLU_MIN( SUPERLU_MAX( c[j], smlnum ), bignum); /* Compute COLCND = min(C(J)) / max(C(J)) */ *colcnd = SUPERLU_MAX( rcmin, smlnum ) / SUPERLU_MIN( rcmax, bignum ); } return; } /* zgsequ_dist */ SuperLU_DIST_5.3.0/SRC/pzdistribute.c0000644013363400111340000010617213233431301016166 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Re-distribute A on the 2D process mesh. *
 * -- Distributed SuperLU routine (version 2.3) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * October 15, 2008
 * 
*/ #include "superlu_zdefs.h" /*! \brief * *
 * Purpose
 * =======
 *   Re-distribute A on the 2D process mesh.
 * 
 * Arguments
 * =========
 * 
 * A      (input) SuperMatrix*
 *	  The distributed input matrix A of dimension (A->nrow, A->ncol).
 *        A may be overwritten by diag(R)*A*diag(C)*Pc^T.
 *        The type of A can be: Stype = SLU_NR_loc; Dtype = SLU_Z; Mtype = SLU_GE.
 *
 * ScalePermstruct (input) ScalePermstruct_t*
 *        The data structure to store the scaling and permutation vectors
 *        describing the transformations performed to the original matrix A.
 *
 * Glu_freeable (input) *Glu_freeable_t
 *        The global structure describing the graph of L and U.
 * 
 * grid   (input) gridinfo_t*
 *        The 2D process mesh.
 *
 * colptr (output) int*
 *
 * rowind (output) int*
 *
 * a      (output) doublecomplex*
 *
 * Return value
 * ============
 * 
*/ int_t zReDistribute_A(SuperMatrix *A, ScalePermstruct_t *ScalePermstruct, Glu_freeable_t *Glu_freeable, int_t *xsup, int_t *supno, gridinfo_t *grid, int_t *colptr[], int_t *rowind[], doublecomplex *a[]) { NRformat_loc *Astore; int_t *perm_r; /* row permutation vector */ int_t *perm_c; /* column permutation vector */ int_t i, irow, fst_row, j, jcol, k, gbi, gbj, n, m_loc, jsize; int_t nnz_loc; /* number of local nonzeros */ int_t SendCnt; /* number of remote nonzeros to be sent */ int_t RecvCnt; /* number of remote nonzeros to be sent */ int_t *nnzToSend, *nnzToRecv, maxnnzToRecv; int_t *ia, *ja, **ia_send, *index, *itemp; int_t *ptr_to_send; doublecomplex *aij, **aij_send, *nzval, *dtemp; doublecomplex *nzval_a; int iam, it, p, procs; MPI_Request *send_req; MPI_Status status; /* ------------------------------------------------------------ INITIALIZATION. ------------------------------------------------------------*/ iam = grid->iam; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter zReDistribute_A()"); #endif perm_r = ScalePermstruct->perm_r; perm_c = ScalePermstruct->perm_c; procs = grid->nprow * grid->npcol; Astore = (NRformat_loc *) A->Store; n = A->ncol; m_loc = Astore->m_loc; fst_row = Astore->fst_row; nnzToRecv = intCalloc_dist(2*procs); nnzToSend = nnzToRecv + procs; /* ------------------------------------------------------------ COUNT THE NUMBER OF NONZEROS TO BE SENT TO EACH PROCESS, THEN ALLOCATE SPACE. THIS ACCOUNTS FOR THE FIRST PASS OF A. ------------------------------------------------------------*/ for (i = 0; i < m_loc; ++i) { for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) { irow = perm_c[perm_r[i+fst_row]]; /* Row number in Pc*Pr*A */ jcol = Astore->colind[j]; gbi = BlockNum( irow ); gbj = BlockNum( jcol ); p = PNUM( PROW(gbi,grid), PCOL(gbj,grid), grid ); ++nnzToSend[p]; } } /* All-to-all communication */ MPI_Alltoall( nnzToSend, 1, mpi_int_t, nnzToRecv, 1, mpi_int_t, grid->comm); maxnnzToRecv = 0; nnz_loc = SendCnt = RecvCnt = 0; for (p = 0; p < procs; ++p) { if ( p != iam ) { SendCnt += nnzToSend[p]; RecvCnt += nnzToRecv[p]; maxnnzToRecv = SUPERLU_MAX( nnzToRecv[p], maxnnzToRecv ); } else { nnz_loc += nnzToRecv[p]; /*assert(nnzToSend[p] == nnzToRecv[p]);*/ } } k = nnz_loc + RecvCnt; /* Total nonzeros ended up in my process. */ /* Allocate space for storing the triplets after redistribution. */ if ( k ) { /* count can be zero. */ if ( !(ia = intMalloc_dist(2*k)) ) ABORT("Malloc fails for ia[]."); if ( !(aij = doublecomplexMalloc_dist(k)) ) ABORT("Malloc fails for aij[]."); } ja = ia + k; /* Allocate temporary storage for sending/receiving the A triplets. */ if ( procs > 1 ) { if ( !(send_req = (MPI_Request *) SUPERLU_MALLOC(2*procs *sizeof(MPI_Request))) ) ABORT("Malloc fails for send_req[]."); if ( !(ia_send = (int_t **) SUPERLU_MALLOC(procs*sizeof(int_t*))) ) ABORT("Malloc fails for ia_send[]."); if ( !(aij_send = (doublecomplex **)SUPERLU_MALLOC(procs*sizeof(doublecomplex*))) ) ABORT("Malloc fails for aij_send[]."); if ( SendCnt ) { /* count can be zero */ if ( !(index = intMalloc_dist(2*SendCnt)) ) ABORT("Malloc fails for index[]."); if ( !(nzval = doublecomplexMalloc_dist(SendCnt)) ) ABORT("Malloc fails for nzval[]."); } if ( !(ptr_to_send = intCalloc_dist(procs)) ) ABORT("Malloc fails for ptr_to_send[]."); if ( maxnnzToRecv ) { /* count can be zero */ if ( !(itemp = intMalloc_dist(2*maxnnzToRecv)) ) ABORT("Malloc fails for itemp[]."); if ( !(dtemp = doublecomplexMalloc_dist(maxnnzToRecv)) ) ABORT("Malloc fails for dtemp[]."); } for (i = 0, j = 0, p = 0; p < procs; ++p) { if ( p != iam ) { ia_send[p] = &index[i]; i += 2 * nnzToSend[p]; /* ia/ja indices alternate */ aij_send[p] = &nzval[j]; j += nnzToSend[p]; } } } /* if procs > 1 */ if ( !(*colptr = intCalloc_dist(n+1)) ) ABORT("Malloc fails for *colptr[]."); /* ------------------------------------------------------------ LOAD THE ENTRIES OF A INTO THE (IA,JA,AIJ) STRUCTURES TO SEND. THIS ACCOUNTS FOR THE SECOND PASS OF A. ------------------------------------------------------------*/ nnz_loc = 0; /* Reset the local nonzero count. */ nzval_a = Astore->nzval; for (i = 0; i < m_loc; ++i) { for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) { irow = perm_c[perm_r[i+fst_row]]; /* Row number in Pc*Pr*A */ jcol = Astore->colind[j]; gbi = BlockNum( irow ); gbj = BlockNum( jcol ); p = PNUM( PROW(gbi,grid), PCOL(gbj,grid), grid ); if ( p != iam ) { /* remote */ k = ptr_to_send[p]; ia_send[p][k] = irow; ia_send[p][k + nnzToSend[p]] = jcol; aij_send[p][k] = nzval_a[j]; ++ptr_to_send[p]; } else { /* local */ ia[nnz_loc] = irow; ja[nnz_loc] = jcol; aij[nnz_loc] = nzval_a[j]; ++nnz_loc; ++(*colptr)[jcol]; /* Count nonzeros in each column */ } } } /* ------------------------------------------------------------ PERFORM REDISTRIBUTION. THIS INVOLVES ALL-TO-ALL COMMUNICATION. NOTE: Can possibly use MPI_Alltoallv. ------------------------------------------------------------*/ for (p = 0; p < procs; ++p) { if ( p != iam ) { it = 2*nnzToSend[p]; MPI_Isend( ia_send[p], it, mpi_int_t, p, iam, grid->comm, &send_req[p] ); it = nnzToSend[p]; MPI_Isend( aij_send[p], it, SuperLU_MPI_DOUBLE_COMPLEX, p, iam+procs, grid->comm, &send_req[procs+p] ); } } for (p = 0; p < procs; ++p) { if ( p != iam ) { it = 2*nnzToRecv[p]; MPI_Recv( itemp, it, mpi_int_t, p, p, grid->comm, &status ); it = nnzToRecv[p]; MPI_Recv( dtemp, it, SuperLU_MPI_DOUBLE_COMPLEX, p, p+procs, grid->comm, &status ); for (i = 0; i < nnzToRecv[p]; ++i) { ia[nnz_loc] = itemp[i]; jcol = itemp[i + nnzToRecv[p]]; /*assert(jcol 1 ) { SUPERLU_FREE(send_req); SUPERLU_FREE(ia_send); SUPERLU_FREE(aij_send); if ( SendCnt ) { SUPERLU_FREE(index); SUPERLU_FREE(nzval); } SUPERLU_FREE(ptr_to_send); if ( maxnnzToRecv ) { SUPERLU_FREE(itemp); SUPERLU_FREE(dtemp); } } /* ------------------------------------------------------------ CONVERT THE TRIPLET FORMAT INTO THE CCS FORMAT. ------------------------------------------------------------*/ if ( nnz_loc ) { /* nnz_loc can be zero */ if ( !(*rowind = intMalloc_dist(nnz_loc)) ) ABORT("Malloc fails for *rowind[]."); if ( !(*a = doublecomplexMalloc_dist(nnz_loc)) ) ABORT("Malloc fails for *a[]."); } /* Initialize the array of column pointers */ k = 0; jsize = (*colptr)[0]; (*colptr)[0] = 0; for (j = 1; j < n; ++j) { k += jsize; jsize = (*colptr)[j]; (*colptr)[j] = k; } /* Copy the triplets into the column oriented storage */ for (i = 0; i < nnz_loc; ++i) { j = ja[i]; k = (*colptr)[j]; (*rowind)[k] = ia[i]; (*a)[k] = aij[i]; ++(*colptr)[j]; } /* Reset the column pointers to the beginning of each column */ for (j = n; j > 0; --j) (*colptr)[j] = (*colptr)[j-1]; (*colptr)[0] = 0; if ( nnz_loc ) { SUPERLU_FREE(ia); SUPERLU_FREE(aij); } #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit zReDistribute_A()"); #endif return 0; } /* zReDistribute_A */ float pzdistribute(fact_t fact, int_t n, SuperMatrix *A, ScalePermstruct_t *ScalePermstruct, Glu_freeable_t *Glu_freeable, LUstruct_t *LUstruct, gridinfo_t *grid) /* * -- Distributed SuperLU routine (version 2.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley. * March 15, 2003 * * * Purpose * ======= * Distribute the matrix onto the 2D process mesh. * * Arguments * ========= * * fact (input) fact_t * Specifies whether or not the L and U structures will be re-used. * = SamePattern_SameRowPerm: L and U structures are input, and * unchanged on exit. * = DOFACT or SamePattern: L and U structures are computed and output. * * n (input) int * Dimension of the matrix. * * A (input) SuperMatrix* * The distributed input matrix A of dimension (A->nrow, A->ncol). * A may be overwritten by diag(R)*A*diag(C)*Pc^T. The type of A can be: * Stype = SLU_NR_loc; Dtype = SLU_Z; Mtype = SLU_GE. * * ScalePermstruct (input) ScalePermstruct_t* * The data structure to store the scaling and permutation vectors * describing the transformations performed to the original matrix A. * * Glu_freeable (input) *Glu_freeable_t * The global structure describing the graph of L and U. * * LUstruct (input) LUstruct_t* * Data structures for L and U factors. * * grid (input) gridinfo_t* * The 2D process mesh. * * Return value * ============ * > 0, working storage required (in bytes). * */ { Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; int_t bnnz, fsupc, fsupc1, i, ii, irow, istart, j, jb, jj, k, len, len1, nsupc; int_t ljb; /* local block column number */ int_t nrbl; /* number of L blocks in current block column */ int_t nrbu; /* number of U blocks in current block column */ int_t gb; /* global block number; 0 < gb <= nsuper */ int_t lb; /* local block number; 0 < lb <= ceil(NSUPERS/Pr) */ int iam, jbrow, kcol, mycol, myrow, pc, pr; int_t mybufmax[NBUFFERS]; NRformat_loc *Astore; doublecomplex *a; int_t *asub, *xa; int_t *xsup = Glu_persist->xsup; /* supernode and column mapping */ int_t *supno = Glu_persist->supno; int_t *lsub, *xlsub, *usub, *xusub; int_t nsupers; int_t next_lind; /* next available position in index[*] */ int_t next_lval; /* next available position in nzval[*] */ int_t *index; /* indices consist of headers and row subscripts */ int *index1; /* temporary pointer to array of int */ doublecomplex *lusup, *uval; /* nonzero values in L and U */ doublecomplex **Lnzval_bc_ptr; /* size ceil(NSUPERS/Pc) */ int_t **Lrowind_bc_ptr; /* size ceil(NSUPERS/Pc) */ doublecomplex **Unzval_br_ptr; /* size ceil(NSUPERS/Pr) */ int_t **Ufstnz_br_ptr; /* size ceil(NSUPERS/Pr) */ /*-- Counts to be used in factorization. --*/ int *ToRecv, *ToSendD, **ToSendR; /*-- Counts to be used in lower triangular solve. --*/ int_t *fmod; /* Modification count for L-solve. */ int_t **fsendx_plist; /* Column process list to send down Xk. */ int_t nfrecvx = 0; /* Number of Xk I will receive. */ int_t nfsendx = 0; /* Number of Xk I will send */ int_t kseen; /*-- Counts to be used in upper triangular solve. --*/ int_t *bmod; /* Modification count for U-solve. */ int_t **bsendx_plist; /* Column process list to send down Xk. */ int_t nbrecvx = 0; /* Number of Xk I will receive. */ int_t nbsendx = 0; /* Number of Xk I will send */ int_t *ilsum; /* starting position of each supernode in the full array (local) */ /*-- Auxiliary arrays; freed on return --*/ int_t *rb_marker; /* block hit marker; size ceil(NSUPERS/Pr) */ int_t *Urb_length; /* U block length; size ceil(NSUPERS/Pr) */ int_t *Urb_indptr; /* pointers to U index[]; size ceil(NSUPERS/Pr) */ int_t *Urb_fstnz; /* # of fstnz in a block row; size ceil(NSUPERS/Pr) */ int_t *Ucbs; /* number of column blocks in a block row */ int_t *Lrb_length; /* L block length; size ceil(NSUPERS/Pr) */ int_t *Lrb_number; /* global block number; size ceil(NSUPERS/Pr) */ int_t *Lrb_indptr; /* pointers to L index[]; size ceil(NSUPERS/Pr) */ int_t *Lrb_valptr; /* pointers to L nzval[]; size ceil(NSUPERS/Pr) */ doublecomplex *dense, *dense_col; /* SPA */ doublecomplex zero = {0.0, 0.0}; int_t ldaspa; /* LDA of SPA */ int_t iword, dword; float mem_use = 0.0; #if ( PRNTlevel>=1 ) int_t nLblocks = 0, nUblocks = 0; #endif #if ( PROFlevel>=1 ) double t, t_u, t_l; int_t u_blks; #endif /* Initialization. */ iam = grid->iam; myrow = MYROW( iam, grid ); mycol = MYCOL( iam, grid ); for (i = 0; i < NBUFFERS; ++i) mybufmax[i] = 0; nsupers = supno[n-1] + 1; Astore = (NRformat_loc *) A->Store; #if ( PRNTlevel>=1 ) iword = sizeof(int_t); dword = sizeof(doublecomplex); #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter pzdistribute()"); #endif #if ( PROFlevel>=1 ) t = SuperLU_timer_(); #endif zReDistribute_A(A, ScalePermstruct, Glu_freeable, xsup, supno, grid, &xa, &asub, &a); #if ( PROFlevel>=1 ) t = SuperLU_timer_() - t; if ( !iam ) printf("--------\n" ".. Phase 1 - ReDistribute_A time: %.2f\t\n", t); #endif if ( fact == SamePattern_SameRowPerm ) { #if ( PROFlevel>=1 ) t_l = t_u = 0; u_blks = 0; #endif /* We can propagate the new values of A into the existing L and U data structures. */ ilsum = Llu->ilsum; ldaspa = Llu->ldalsum; if ( !(dense = doublecomplexCalloc_dist(ldaspa * sp_ienv_dist(3))) ) ABORT("Calloc fails for SPA dense[]."); nrbu = CEILING( nsupers, grid->nprow ); /* No. of local block rows */ if ( !(Urb_length = intCalloc_dist(nrbu)) ) ABORT("Calloc fails for Urb_length[]."); if ( !(Urb_indptr = intMalloc_dist(nrbu)) ) ABORT("Malloc fails for Urb_indptr[]."); Lrowind_bc_ptr = Llu->Lrowind_bc_ptr; Lnzval_bc_ptr = Llu->Lnzval_bc_ptr; Ufstnz_br_ptr = Llu->Ufstnz_br_ptr; Unzval_br_ptr = Llu->Unzval_br_ptr; #if ( PRNTlevel>=1 ) mem_use += 2.0*nrbu*iword + ldaspa*sp_ienv_dist(3)*dword; #endif #if ( PROFlevel>=1 ) t = SuperLU_timer_(); #endif /* Initialize Uval to zero. */ for (lb = 0; lb < nrbu; ++lb) { Urb_indptr[lb] = BR_HEADER; /* Skip header in U index[]. */ index = Ufstnz_br_ptr[lb]; if ( index ) { uval = Unzval_br_ptr[lb]; len = index[1]; for (i = 0; i < len; ++i) uval[i] = zero; } /* if index != NULL */ } /* for lb ... */ for (jb = 0; jb < nsupers; ++jb) { /* Loop through each block column */ pc = PCOL( jb, grid ); if ( mycol == pc ) { /* Block column jb in my process column */ fsupc = FstBlockC( jb ); nsupc = SuperSize( jb ); /* Scatter A into SPA (for L), or into U directly. */ for (j = fsupc, dense_col = dense; j < FstBlockC(jb+1); ++j) { for (i = xa[j]; i < xa[j+1]; ++i) { irow = asub[i]; gb = BlockNum( irow ); if ( myrow == PROW( gb, grid ) ) { lb = LBi( gb, grid ); if ( gb < jb ) { /* in U */ index = Ufstnz_br_ptr[lb]; uval = Unzval_br_ptr[lb]; while ( (k = index[Urb_indptr[lb]]) < jb ) { /* Skip nonzero values in this block */ Urb_length[lb] += index[Urb_indptr[lb]+1]; /* Move pointer to the next block */ Urb_indptr[lb] += UB_DESCRIPTOR + SuperSize( k ); } /*assert(k == jb);*/ /* start fstnz */ istart = Urb_indptr[lb] + UB_DESCRIPTOR; len = Urb_length[lb]; fsupc1 = FstBlockC( gb+1 ); k = j - fsupc; /* Sum the lengths of the leading columns */ for (jj = 0; jj < k; ++jj) len += fsupc1 - index[istart++]; /*assert(irow>=index[istart]);*/ uval[len + irow - index[istart]] = a[i]; } else { /* in L; put in SPA first */ irow = ilsum[lb] + irow - FstBlockC( gb ); dense_col[irow] = a[i]; } } } /* for i ... */ dense_col += ldaspa; } /* for j ... */ #if ( PROFlevel>=1 ) t_u += SuperLU_timer_() - t; t = SuperLU_timer_(); #endif /* Gather the values of A from SPA into Lnzval[]. */ ljb = LBj( jb, grid ); /* Local block number */ index = Lrowind_bc_ptr[ljb]; if ( index ) { nrbl = index[0]; /* Number of row blocks. */ len = index[1]; /* LDA of lusup[]. */ lusup = Lnzval_bc_ptr[ljb]; next_lind = BC_HEADER; next_lval = 0; for (jj = 0; jj < nrbl; ++jj) { gb = index[next_lind++]; len1 = index[next_lind++]; /* Rows in the block. */ lb = LBi( gb, grid ); for (bnnz = 0; bnnz < len1; ++bnnz) { irow = index[next_lind++]; /* Global index. */ irow = ilsum[lb] + irow - FstBlockC( gb ); k = next_lval++; for (j = 0, dense_col = dense; j < nsupc; ++j) { lusup[k] = dense_col[irow]; dense_col[irow] = zero; k += len; dense_col += ldaspa; } } /* for bnnz ... */ } /* for jj ... */ } /* if index ... */ #if ( PROFlevel>=1 ) t_l += SuperLU_timer_() - t; #endif } /* if mycol == pc */ } /* for jb ... */ SUPERLU_FREE(dense); SUPERLU_FREE(Urb_length); SUPERLU_FREE(Urb_indptr); #if ( PROFlevel>=1 ) if ( !iam ) printf(".. 2nd distribute time: L %.2f\tU %.2f\tu_blks %d\tnrbu %d\n", t_l, t_u, u_blks, nrbu); #endif } else { /* ------------------------------------------------------------ FIRST TIME CREATING THE L AND U DATA STRUCTURES. ------------------------------------------------------------*/ #if ( PROFlevel>=1 ) t_l = t_u = 0; u_blks = 0; #endif /* We first need to set up the L and U data structures and then * propagate the values of A into them. */ lsub = Glu_freeable->lsub; /* compressed L subscripts */ xlsub = Glu_freeable->xlsub; usub = Glu_freeable->usub; /* compressed U subscripts */ xusub = Glu_freeable->xusub; if ( !(ToRecv = (int *) SUPERLU_MALLOC(nsupers * sizeof(int))) ) ABORT("Malloc fails for ToRecv[]."); for (i = 0; i < nsupers; ++i) ToRecv[i] = 0; k = CEILING( nsupers, grid->npcol );/* Number of local column blocks */ if ( !(ToSendR = (int **) SUPERLU_MALLOC(k*sizeof(int*))) ) ABORT("Malloc fails for ToSendR[]."); j = k * grid->npcol; if ( !(index1 = SUPERLU_MALLOC(j * sizeof(int))) ) ABORT("Malloc fails for index[]."); #if ( PRNTlevel>=1 ) mem_use += (float) k*sizeof(int_t*) + (j + nsupers)*iword; #endif for (i = 0; i < j; ++i) index1[i] = EMPTY; for (i = 0,j = 0; i < k; ++i, j += grid->npcol) ToSendR[i] = &index1[j]; k = CEILING( nsupers, grid->nprow ); /* Number of local block rows */ /* Pointers to the beginning of each block row of U. */ if ( !(Unzval_br_ptr = (doublecomplex**)SUPERLU_MALLOC(k * sizeof(doublecomplex*))) ) ABORT("Malloc fails for Unzval_br_ptr[]."); if ( !(Ufstnz_br_ptr = (int_t**)SUPERLU_MALLOC(k * sizeof(int_t*))) ) ABORT("Malloc fails for Ufstnz_br_ptr[]."); if ( !(ToSendD = SUPERLU_MALLOC(k * sizeof(int))) ) ABORT("Malloc fails for ToSendD[]."); for (i = 0; i < k; ++i) ToSendD[i] = NO; if ( !(ilsum = intMalloc_dist(k+1)) ) ABORT("Malloc fails for ilsum[]."); /* Auxiliary arrays used to set up U block data structures. They are freed on return. */ if ( !(rb_marker = intCalloc_dist(k)) ) ABORT("Calloc fails for rb_marker[]."); if ( !(Urb_length = intCalloc_dist(k)) ) ABORT("Calloc fails for Urb_length[]."); if ( !(Urb_indptr = intMalloc_dist(k)) ) ABORT("Malloc fails for Urb_indptr[]."); if ( !(Urb_fstnz = intCalloc_dist(k)) ) ABORT("Calloc fails for Urb_fstnz[]."); if ( !(Ucbs = intCalloc_dist(k)) ) ABORT("Calloc fails for Ucbs[]."); #if ( PRNTlevel>=1 ) mem_use += 2.0*k*sizeof(int_t*) + (7*k+1)*iword; #endif /* Compute ldaspa and ilsum[]. */ ldaspa = 0; ilsum[0] = 0; for (gb = 0; gb < nsupers; ++gb) { if ( myrow == PROW( gb, grid ) ) { i = SuperSize( gb ); ldaspa += i; lb = LBi( gb, grid ); ilsum[lb + 1] = ilsum[lb] + i; } } #if ( PROFlevel>=1 ) t = SuperLU_timer_(); #endif /* ------------------------------------------------------------ COUNT NUMBER OF ROW BLOCKS AND THE LENGTH OF EACH BLOCK IN U. THIS ACCOUNTS FOR ONE-PASS PROCESSING OF G(U). ------------------------------------------------------------*/ /* Loop through each supernode column. */ for (jb = 0; jb < nsupers; ++jb) { pc = PCOL( jb, grid ); fsupc = FstBlockC( jb ); nsupc = SuperSize( jb ); /* Loop through each column in the block. */ for (j = fsupc; j < fsupc + nsupc; ++j) { /* usub[*] contains only "first nonzero" in each segment. */ for (i = xusub[j]; i < xusub[j+1]; ++i) { irow = usub[i]; /* First nonzero of the segment. */ gb = BlockNum( irow ); kcol = PCOL( gb, grid ); ljb = LBj( gb, grid ); if ( mycol == kcol && mycol != pc ) ToSendR[ljb][pc] = YES; pr = PROW( gb, grid ); lb = LBi( gb, grid ); if ( mycol == pc ) { if ( myrow == pr ) { ToSendD[lb] = YES; /* Count nonzeros in entire block row. */ Urb_length[lb] += FstBlockC( gb+1 ) - irow; if (rb_marker[lb] <= jb) {/* First see the block */ rb_marker[lb] = jb + 1; Urb_fstnz[lb] += nsupc; ++Ucbs[lb]; /* Number of column blocks in block row lb. */ #if ( PRNTlevel>=1 ) ++nUblocks; #endif } ToRecv[gb] = 1; } else ToRecv[gb] = 2; /* Do I need 0, 1, 2 ? */ } } /* for i ... */ } /* for j ... */ } /* for jb ... */ /* Set up the initial pointers for each block row in U. */ nrbu = CEILING( nsupers, grid->nprow );/* Number of local block rows */ for (lb = 0; lb < nrbu; ++lb) { len = Urb_length[lb]; rb_marker[lb] = 0; /* Reset block marker. */ if ( len ) { /* Add room for descriptors */ len1 = Urb_fstnz[lb] + BR_HEADER + Ucbs[lb] * UB_DESCRIPTOR; if ( !(index = intMalloc_dist(len1+1)) ) ABORT("Malloc fails for Uindex[]."); Ufstnz_br_ptr[lb] = index; if ( !(Unzval_br_ptr[lb] = doublecomplexMalloc_dist(len)) ) ABORT("Malloc fails for Unzval_br_ptr[*][]."); mybufmax[2] = SUPERLU_MAX( mybufmax[2], len1 ); mybufmax[3] = SUPERLU_MAX( mybufmax[3], len ); index[0] = Ucbs[lb]; /* Number of column blocks */ index[1] = len; /* Total length of nzval[] */ index[2] = len1; /* Total length of index[] */ index[len1] = -1; /* End marker */ } else { Ufstnz_br_ptr[lb] = NULL; Unzval_br_ptr[lb] = NULL; } Urb_length[lb] = 0; /* Reset block length. */ Urb_indptr[lb] = BR_HEADER; /* Skip header in U index[]. */ Urb_fstnz[lb] = BR_HEADER; } /* for lb ... */ SUPERLU_FREE(Ucbs); #if ( PROFlevel>=1 ) t = SuperLU_timer_() - t; if ( !iam) printf(".. Phase 2 - setup U strut time: %.2f\t\n", t); #endif #if ( PRNTlevel>=1 ) mem_use -= 2.0*k * iword; #endif /* Auxiliary arrays used to set up L block data structures. They are freed on return. k is the number of local row blocks. */ if ( !(Lrb_length = intCalloc_dist(k)) ) ABORT("Calloc fails for Lrb_length[]."); if ( !(Lrb_number = intMalloc_dist(k)) ) ABORT("Malloc fails for Lrb_number[]."); if ( !(Lrb_indptr = intMalloc_dist(k)) ) ABORT("Malloc fails for Lrb_indptr[]."); if ( !(Lrb_valptr = intMalloc_dist(k)) ) ABORT("Malloc fails for Lrb_valptr[]."); if ( !(dense = doublecomplexCalloc_dist(ldaspa * sp_ienv_dist(3))) ) ABORT("Calloc fails for SPA dense[]."); /* These counts will be used for triangular solves. */ if ( !(fmod = intCalloc_dist(k)) ) ABORT("Calloc fails for fmod[]."); if ( !(bmod = intCalloc_dist(k)) ) ABORT("Calloc fails for bmod[]."); /* ------------------------------------------------ */ #if ( PRNTlevel>=1 ) mem_use += 6.0*k*iword + ldaspa*sp_ienv_dist(3)*dword; #endif k = CEILING( nsupers, grid->npcol );/* Number of local block columns */ /* Pointers to the beginning of each block column of L. */ if ( !(Lnzval_bc_ptr = (doublecomplex**)SUPERLU_MALLOC(k * sizeof(doublecomplex*))) ) ABORT("Malloc fails for Lnzval_bc_ptr[]."); if ( !(Lrowind_bc_ptr = (int_t**)SUPERLU_MALLOC(k * sizeof(int_t*))) ) ABORT("Malloc fails for Lrowind_bc_ptr[]."); Lrowind_bc_ptr[k-1] = NULL; /* These lists of processes will be used for triangular solves. */ if ( !(fsendx_plist = (int_t **) SUPERLU_MALLOC(k*sizeof(int_t*))) ) ABORT("Malloc fails for fsendx_plist[]."); len = k * grid->nprow; if ( !(index = intMalloc_dist(len)) ) ABORT("Malloc fails for fsendx_plist[0]"); for (i = 0; i < len; ++i) index[i] = EMPTY; for (i = 0, j = 0; i < k; ++i, j += grid->nprow) fsendx_plist[i] = &index[j]; if ( !(bsendx_plist = (int_t **) SUPERLU_MALLOC(k*sizeof(int_t*))) ) ABORT("Malloc fails for bsendx_plist[]."); if ( !(index = intMalloc_dist(len)) ) ABORT("Malloc fails for bsendx_plist[0]"); for (i = 0; i < len; ++i) index[i] = EMPTY; for (i = 0, j = 0; i < k; ++i, j += grid->nprow) bsendx_plist[i] = &index[j]; /* -------------------------------------------------------------- */ #if ( PRNTlevel>=1 ) mem_use += 4.0*k*sizeof(int_t*) + 2.0*len*iword; #endif /*------------------------------------------------------------ PROPAGATE ROW SUBSCRIPTS AND VALUES OF A INTO L AND U BLOCKS. THIS ACCOUNTS FOR ONE-PASS PROCESSING OF A, L AND U. ------------------------------------------------------------*/ for (jb = 0; jb < nsupers; ++jb) { /* for each block column ... */ pc = PCOL( jb, grid ); if ( mycol == pc ) { /* Block column jb in my process column */ fsupc = FstBlockC( jb ); nsupc = SuperSize( jb ); ljb = LBj( jb, grid ); /* Local block number */ /* Scatter A into SPA. */ for (j = fsupc, dense_col = dense; j < FstBlockC(jb+1); ++j) { for (i = xa[j]; i < xa[j+1]; ++i) { irow = asub[i]; gb = BlockNum( irow ); if ( myrow == PROW( gb, grid ) ) { lb = LBi( gb, grid ); irow = ilsum[lb] + irow - FstBlockC( gb ); dense_col[irow] = a[i]; } } dense_col += ldaspa; } /* for j ... */ jbrow = PROW( jb, grid ); /*------------------------------------------------ * SET UP U BLOCKS. *------------------------------------------------*/ #if ( PROFlevel>=1 ) t = SuperLU_timer_(); #endif kseen = 0; dense_col = dense; /* Loop through each column in the block column. */ for (j = fsupc; j < FstBlockC( jb+1 ); ++j) { istart = xusub[j]; /* NOTE: Only the first nonzero index of the segment is stored in usub[]. */ for (i = istart; i < xusub[j+1]; ++i) { irow = usub[i]; /* First nonzero in the segment. */ gb = BlockNum( irow ); pr = PROW( gb, grid ); if ( pr != jbrow && myrow == jbrow && /* diag. proc. owning jb */ bsendx_plist[ljb][pr] == EMPTY ) { bsendx_plist[ljb][pr] = YES; ++nbsendx; } if ( myrow == pr ) { lb = LBi( gb, grid ); /* Local block number */ index = Ufstnz_br_ptr[lb]; uval = Unzval_br_ptr[lb]; fsupc1 = FstBlockC( gb+1 ); if (rb_marker[lb] <= jb) { /* First time see the block */ rb_marker[lb] = jb + 1; Urb_indptr[lb] = Urb_fstnz[lb];; index[Urb_indptr[lb]] = jb; /* Descriptor */ Urb_indptr[lb] += UB_DESCRIPTOR; /* Record the first location in index[] of the next block */ Urb_fstnz[lb] = Urb_indptr[lb] + nsupc; len = Urb_indptr[lb];/* Start fstnz in index */ index[len-1] = 0; for (k = 0; k < nsupc; ++k) index[len+k] = fsupc1; if ( gb != jb )/* Exclude diagonal block. */ ++bmod[lb];/* Mod. count for back solve */ if ( kseen == 0 && myrow != jbrow ) { ++nbrecvx; kseen = 1; } } else { /* Already saw the block */ len = Urb_indptr[lb];/* Start fstnz in index */ } jj = j - fsupc; index[len+jj] = irow; /* Load the numerical values */ k = fsupc1 - irow; /* No. of nonzeros in segment */ index[len-1] += k; /* Increment block length in Descriptor */ irow = ilsum[lb] + irow - FstBlockC( gb ); for (ii = 0; ii < k; ++ii) { uval[Urb_length[lb]++] = dense_col[irow + ii]; dense_col[irow + ii] = zero; } } /* if myrow == pr ... */ } /* for i ... */ dense_col += ldaspa; } /* for j ... */ #if ( PROFlevel>=1 ) t_u += SuperLU_timer_() - t; t = SuperLU_timer_(); #endif /*------------------------------------------------ * SET UP L BLOCKS. *------------------------------------------------*/ /* Count number of blocks and length of each block. */ nrbl = 0; len = 0; /* Number of row subscripts I own. */ kseen = 0; istart = xlsub[fsupc]; for (i = istart; i < xlsub[fsupc+1]; ++i) { irow = lsub[i]; gb = BlockNum( irow ); /* Global block number */ pr = PROW( gb, grid ); /* Process row owning this block */ if ( pr != jbrow && myrow == jbrow && /* diag. proc. owning jb */ fsendx_plist[ljb][pr] == EMPTY /* first time */ ) { fsendx_plist[ljb][pr] = YES; ++nfsendx; } if ( myrow == pr ) { lb = LBi( gb, grid ); /* Local block number */ if (rb_marker[lb] <= jb) { /* First see this block */ rb_marker[lb] = jb + 1; Lrb_length[lb] = 1; Lrb_number[nrbl++] = gb; if ( gb != jb ) /* Exclude diagonal block. */ ++fmod[lb]; /* Mod. count for forward solve */ if ( kseen == 0 && myrow != jbrow ) { ++nfrecvx; kseen = 1; } #if ( PRNTlevel>=1 ) ++nLblocks; #endif } else { ++Lrb_length[lb]; } ++len; } } /* for i ... */ if ( nrbl ) { /* Do not ensure the blocks are sorted! */ /* Set up the initial pointers for each block in index[] and nzval[]. */ /* Add room for descriptors */ len1 = len + BC_HEADER + nrbl * LB_DESCRIPTOR; if ( !(index = intMalloc_dist(len1)) ) ABORT("Malloc fails for index[]"); Lrowind_bc_ptr[ljb] = index; if (!(Lnzval_bc_ptr[ljb] = doublecomplexMalloc_dist(len*nsupc))) { fprintf(stderr, "col block " IFMT " ", jb); ABORT("Malloc fails for Lnzval_bc_ptr[*][]"); } mybufmax[0] = SUPERLU_MAX( mybufmax[0], len1 ); mybufmax[1] = SUPERLU_MAX( mybufmax[1], len*nsupc ); mybufmax[4] = SUPERLU_MAX( mybufmax[4], len ); index[0] = nrbl; /* Number of row blocks */ index[1] = len; /* LDA of the nzval[] */ next_lind = BC_HEADER; next_lval = 0; for (k = 0; k < nrbl; ++k) { gb = Lrb_number[k]; lb = LBi( gb, grid ); len = Lrb_length[lb]; Lrb_length[lb] = 0; /* Reset vector of block length */ index[next_lind++] = gb; /* Descriptor */ index[next_lind++] = len; Lrb_indptr[lb] = next_lind; Lrb_valptr[lb] = next_lval; next_lind += len; next_lval += len; } /* Propagate the compressed row subscripts to Lindex[], and the initial values of A from SPA into Lnzval[]. */ lusup = Lnzval_bc_ptr[ljb]; len = index[1]; /* LDA of lusup[] */ for (i = istart; i < xlsub[fsupc+1]; ++i) { irow = lsub[i]; gb = BlockNum( irow ); if ( myrow == PROW( gb, grid ) ) { lb = LBi( gb, grid ); k = Lrb_indptr[lb]++; /* Random access a block */ index[k] = irow; k = Lrb_valptr[lb]++; irow = ilsum[lb] + irow - FstBlockC( gb ); for (j = 0, dense_col = dense; j < nsupc; ++j) { lusup[k] = dense_col[irow]; dense_col[irow] = zero; k += len; dense_col += ldaspa; } } } /* for i ... */ } else { Lrowind_bc_ptr[ljb] = NULL; Lnzval_bc_ptr[ljb] = NULL; } /* if nrbl ... */ #if ( PROFlevel>=1 ) t_l += SuperLU_timer_() - t; #endif } /* if mycol == pc */ } /* for jb ... */ Llu->Lrowind_bc_ptr = Lrowind_bc_ptr; Llu->Lnzval_bc_ptr = Lnzval_bc_ptr; Llu->Ufstnz_br_ptr = Ufstnz_br_ptr; Llu->Unzval_br_ptr = Unzval_br_ptr; Llu->ToRecv = ToRecv; Llu->ToSendD = ToSendD; Llu->ToSendR = ToSendR; Llu->fmod = fmod; Llu->fsendx_plist = fsendx_plist; Llu->nfrecvx = nfrecvx; Llu->nfsendx = nfsendx; Llu->bmod = bmod; Llu->bsendx_plist = bsendx_plist; Llu->nbrecvx = nbrecvx; Llu->nbsendx = nbsendx; Llu->ilsum = ilsum; Llu->ldalsum = ldaspa; #if ( PRNTlevel>=1 ) if ( !iam ) printf(".. # L blocks " IFMT "\t# U blocks " IFMT "\n", nLblocks, nUblocks); #endif SUPERLU_FREE(rb_marker); SUPERLU_FREE(Urb_fstnz); SUPERLU_FREE(Urb_length); SUPERLU_FREE(Urb_indptr); SUPERLU_FREE(Lrb_length); SUPERLU_FREE(Lrb_number); SUPERLU_FREE(Lrb_indptr); SUPERLU_FREE(Lrb_valptr); SUPERLU_FREE(dense); /* Find the maximum buffer size. */ MPI_Allreduce(mybufmax, Llu->bufmax, NBUFFERS, mpi_int_t, MPI_MAX, grid->comm); k = CEILING( nsupers, grid->nprow );/* Number of local block rows */ if ( !(Llu->mod_bit = intMalloc_dist(k)) ) ABORT("Malloc fails for mod_bit[]."); #if ( PROFlevel>=1 ) if ( !iam ) printf(".. 1st distribute time:\n " "\tL\t%.2f\n\tU\t%.2f\n" "\tu_blks %d\tnrbu %d\n--------\n", t_l, t_u, u_blks, nrbu); #endif } /* else fact != SamePattern_SameRowPerm */ if ( xa[A->ncol] > 0 ) { /* may not have any entries on this process. */ SUPERLU_FREE(asub); SUPERLU_FREE(a); } SUPERLU_FREE(xa); #if ( DEBUGlevel>=1 ) /* Memory allocated but not freed: ilsum, fmod, fsendx_plist, bmod, bsendx_plist */ CHECK_MALLOC(iam, "Exit pzdistribute()"); #endif return (mem_use); } /* PZDISTRIBUTE */ SuperLU_DIST_5.3.0/SRC/dmyblas2_dist.c0000644013363400111340000001372713233431301016201 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Level 2 BLAS operations: solves and matvec, written in C * *
 * -- SuperLU routine (version 2.0) --
 * Univ. of California Berkeley, Xerox Palo Alto Research Center,
 * and Lawrence Berkeley National Lab.
 * November 15, 1997
 * 
*/ /* * File name: dmyblas2.c * Purpose: * Level 2 BLAS operations: solves and matvec, written in C. * Note: * This is only used when the system lacks an efficient BLAS library. */ /*! \brief * *
 * Solves a dense UNIT lower triangular system. The unit lower 
 * triangular matrix is stored in a 2D array M(1:nrow,1:ncol). 
 * The solution will be returned in the rhs vector.
 * 
*/ void dlsolve ( int ldm, int ncol, double *M, double *rhs ) { int k; double x0, x1, x2, x3, x4, x5, x6, x7; double *M0; register double *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7; register int firstcol = 0; M0 = &M[0]; while ( firstcol < ncol - 7 ) { /* Do 8 columns */ Mki0 = M0 + 1; Mki1 = Mki0 + ldm + 1; Mki2 = Mki1 + ldm + 1; Mki3 = Mki2 + ldm + 1; Mki4 = Mki3 + ldm + 1; Mki5 = Mki4 + ldm + 1; Mki6 = Mki5 + ldm + 1; Mki7 = Mki6 + ldm + 1; x0 = rhs[firstcol]; x1 = rhs[firstcol+1] - x0 * *Mki0++; x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++; x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++; x4 = rhs[firstcol+4] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++ - x3 * *Mki3++; x5 = rhs[firstcol+5] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++ - x3 * *Mki3++ - x4 * *Mki4++; x6 = rhs[firstcol+6] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++ - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++; x7 = rhs[firstcol+7] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++ - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++ - x6 * *Mki6++; rhs[++firstcol] = x1; rhs[++firstcol] = x2; rhs[++firstcol] = x3; rhs[++firstcol] = x4; rhs[++firstcol] = x5; rhs[++firstcol] = x6; rhs[++firstcol] = x7; ++firstcol; for (k = firstcol; k < ncol; k++) rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++ - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++ - x6 * *Mki6++ - x7 * *Mki7++; M0 += 8 * ldm + 8; } while ( firstcol < ncol - 3 ) { /* Do 4 columns */ Mki0 = M0 + 1; Mki1 = Mki0 + ldm + 1; Mki2 = Mki1 + ldm + 1; Mki3 = Mki2 + ldm + 1; x0 = rhs[firstcol]; x1 = rhs[firstcol+1] - x0 * *Mki0++; x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++; x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++; rhs[++firstcol] = x1; rhs[++firstcol] = x2; rhs[++firstcol] = x3; ++firstcol; for (k = firstcol; k < ncol; k++) rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++ - x3 * *Mki3++; M0 += 4 * ldm + 4; } if ( firstcol < ncol - 1 ) { /* Do 2 columns */ Mki0 = M0 + 1; Mki1 = Mki0 + ldm + 1; x0 = rhs[firstcol]; x1 = rhs[firstcol+1] - x0 * *Mki0++; rhs[++firstcol] = x1; ++firstcol; for (k = firstcol; k < ncol; k++) rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++; } return; } /*! \brief * *
 * Solves a dense upper triangular system. The upper triangular matrix is
 * stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned
 * in the rhs vector.
 * 
*/ void dusolve ( int ldm, /* in */ int ncol, /* in */ double *M, /* in */ double *rhs /* modified */ ) { double xj; int jcol, j, irow; jcol = ncol - 1; for (j = 0; j < ncol; j++) { xj = rhs[jcol] / M[jcol + jcol*ldm]; /* M(jcol, jcol) */ rhs[jcol] = xj; for (irow = 0; irow < jcol; irow++) rhs[irow] -= xj * M[irow + jcol*ldm]; /* M(irow, jcol) */ jcol--; } return; } /*! \brief * *
 * Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec.
 * The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[].
 * 
*/ void dmatvec ( int ldm, /* in -- leading dimension of M */ int nrow, /* in */ int ncol, /* in */ double *M, /* in */ double *vec, /* in */ double *Mxvec /* in/out */ ) { double vi0, vi1, vi2, vi3, vi4, vi5, vi6, vi7; double *M0; register double *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7; register int firstcol = 0; int k; M0 = &M[0]; while ( firstcol < ncol - 7 ) { /* Do 8 columns */ Mki0 = M0; Mki1 = Mki0 + ldm; Mki2 = Mki1 + ldm; Mki3 = Mki2 + ldm; Mki4 = Mki3 + ldm; Mki5 = Mki4 + ldm; Mki6 = Mki5 + ldm; Mki7 = Mki6 + ldm; vi0 = vec[firstcol++]; vi1 = vec[firstcol++]; vi2 = vec[firstcol++]; vi3 = vec[firstcol++]; vi4 = vec[firstcol++]; vi5 = vec[firstcol++]; vi6 = vec[firstcol++]; vi7 = vec[firstcol++]; for (k = 0; k < nrow; k++) Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++ + vi2 * *Mki2++ + vi3 * *Mki3++ + vi4 * *Mki4++ + vi5 * *Mki5++ + vi6 * *Mki6++ + vi7 * *Mki7++; M0 += 8 * ldm; } while ( firstcol < ncol - 3 ) { /* Do 4 columns */ Mki0 = M0; Mki1 = Mki0 + ldm; Mki2 = Mki1 + ldm; Mki3 = Mki2 + ldm; vi0 = vec[firstcol++]; vi1 = vec[firstcol++]; vi2 = vec[firstcol++]; vi3 = vec[firstcol++]; for (k = 0; k < nrow; k++) Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++ + vi2 * *Mki2++ + vi3 * *Mki3++ ; M0 += 4 * ldm; } while ( firstcol < ncol ) { /* Do 1 column */ Mki0 = M0; vi0 = vec[firstcol++]; for (k = 0; k < nrow; k++) Mxvec[k] += vi0 * *Mki0++; M0 += ldm; } return; } SuperLU_DIST_5.3.0/SRC/zSchCompUdt-cuda.c0000644013363400111340000004633313233431301016555 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief This file contains the main loop of pzgstrf which involves * rank k update of the Schur complement. * Uses CUDA GPU. * *
 * -- Distributed SuperLU routine (version 4.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * October 1, 2014
 *
 */

#define SCHEDULE_STRATEGY dynamic

#define cublasCheckErrors(fn) \
    do { \
        cublasStatus_t __err = fn; \
        if (__err != CUBLAS_STATUS_SUCCESS) { \
            fprintf(stderr, "Fatal cublas error: %d (at %s:%d)\n", \
                (int)(__err), \
                __FILE__, __LINE__); \
            fprintf(stderr, "*** FAILED - ABORTING\n"); \
            exit(1); \
        } \
    } while(0);


if ( msg0 && msg2 ) {  /* L(:,k) and U(k,:) are not empty. */
    ldu   =0;
    full  =1;
    int cum_nrow;
    int temp_nbrow;

    lptr = lptr0;
    luptr = luptr0;
    
    nbrow= lsub[1];
    if (myrow==krow) nbrow = lsub[1]-lsub[3];

    if (nbrow>0) {
        
        int ncol_max = SUPERLU_MIN(buffer_size/nbrow,bigu_size/ldt);
        int num_streams_used,        /*number of streams that will be used*/
        ncpu_blks;                     /*Number of CPU dgemm blks*/

        int jjj, jjj_st,jjj_global;        
        for (j = jj0; j < nub; ++j) {
            arrive_at_ublock( j,&iukp,&rukp,&jb,&ljb,&nsupc,
	    		      iukp0,rukp0,usub,perm_u,xsup,grid );

            ncols =0 ;  //initialize at 0 
            jj = iukp;
            int temp_ldu=0; 
            for (; jj < iukp+nsupc; ++jj) {
                segsize = klst - usub[jj];
                if ( segsize ) {
		    ++ncols;
		}
                temp_ldu = SUPERLU_MAX(temp_ldu, segsize);
            }

            full_u_cols[j] = ncols;
            blk_ldu[j] = temp_ldu;
        } /* end for j = jj0..nub */

        jjj = jj0; /* initialization */
            
        // #pragma omp barrier 
        while ( jjj < nub ) {
            jjj_st=jjj;
#ifdef _OPENMP
#pragma omp single
#endif
            {
                ldu = blk_ldu[jjj_st];
                for (j = jjj_st; j < nub ; ++j) {
                    
                    /* prefix sum */
                    if (j != jjj_st) full_u_cols[j] += full_u_cols[j-1];

                    ldu = SUPERLU_MAX(ldu, blk_ldu[j]);   

                    /* break condition */
                    /* the number of columns that can be processed is limited by buffer size*/
                    if (full_u_cols[j]+((j+1==nub)?0:full_u_cols[j+1]) > ncol_max) {
                        break;
                    }
                } /* end for j=jjj_st to nub */  

                jjj_global = SUPERLU_MIN(nub, j+1); /* Maximum value of jjj will be nub */
                
                // TAU_STATIC_TIMER_START("work_divison");
                /* Divide CPU-GPU gemm here */
                gemm_division_cpu_gpu(
		       &num_streams_used, /*number of streams that will be used*/
		       stream_end_col,    /*array holding last column blk for each partition*/
		       &ncpu_blks,        /*Number of CPU gemm blks*/
		       			  /*input*/
		       nbrow,             /*number of row in A matrix*/
		       ldu,               /*number of k in dgemm*/
		       nstreams,
		       full_u_cols + jjj_st, /*array containing prefix sum of work load*/
		       jjj_global-jjj_st     /*Number of work load */
                );
                // TAU_STATIC_TIMER_STOP("work_divison");

            } /* pragma omp single */

            jjj = jjj_global;
            // printf("thread_id %d, jjj %d \n",thread_id,jjj );
            if (jjj == jjj_st+1 && full_u_cols[jjj_st] > ncol_max) {
                printf("allocate more memory for buffer !!!!\n");
                if(nbrow * full_u_cols[jjj_st] > buffer_size)
                    printf("%d buffer_size %d\n",nbrow*full_u_cols[jjj_st],buffer_size );
            }
            
            // #pragma omp barrier 
            /* gathering circuit */
            assert(jjj_st 0 ) {
#ifdef PI_DEBUG
		printf("nbrow %d *ldu %d  =%d < ldt %d * max_row_size %d =%d \n",nbrow,ldu,nbrow*ldu,ldt,max_row_size,ldt*max_row_size );
		assert(nbrow*ldu<=ldt*max_row_size);
#endif 
		cudaMemcpy2DAsync(dA, nbrow*sizeof(doublecomplex),
				  &lusup[luptr+(knsupc-ldu)*nsupr],
				  nsupr*sizeof(doublecomplex), nbrow*sizeof(doublecomplex),
				  ldu, cudaMemcpyHostToDevice, streams[0]);
	    }
                
	    for (int i = 0; i < num_streams_used; ++i) {
		int st = (i==0) ? ncpu_blks+jjj_st : jjj_st+stream_end_col[i-1]; 
		int st_col = full_u_cols[st-1];
		int num_col_stream = full_u_cols[jjj_st+stream_end_col[i]-1]-full_u_cols[st-1];
		tempu = bigU;
                    
		doublecomplex *tempv1 = bigV + full_u_cols[st-1]*nbrow;

		/* Following is for testing purpose */
#ifdef GPU_ACC
		int stream_id = i;
		int b_offset  = ldu * st_col;
		int c_offset  = st_col * nbrow;
		size_t B_stream_size = ldu * num_col_stream * sizeof(doublecomplex);
		size_t C_stream_size = nbrow * num_col_stream * sizeof(doublecomplex);
		
		assert(ldu*(st_col+num_col_stream) < bigu_size);
		assert(nbrow*(st_col+num_col_stream) < buffer_size);
		
		cudaMemcpyAsync(dB+b_offset, tempu+b_offset, B_stream_size,
				cudaMemcpyHostToDevice, streams[stream_id]);
		
		cublasCheckErrors(
				  cublasSetStream(handle[stream_id],
						  streams[stream_id])
				  );
		
		cublasCheckErrors(
				  cublasZgemm(handle[stream_id],
					      CUBLAS_OP_N, CUBLAS_OP_N,
					      nbrow, num_col_stream, ldu,
 					      (const cuDoubleComplex*) &alpha,
					      (const cuDoubleComplex*) dA,
					      nbrow,
					      (const cuDoubleComplex*) &dB[b_offset], 
					      ldu,
					      (const cuDoubleComplex*) &beta,
					      (cuDoubleComplex*)&dC[c_offset],
                                              nbrow)
				  );
		
		checkCuda( cudaMemcpyAsync(tempv1, dC+c_offset,
					   C_stream_size,
					   cudaMemcpyDeviceToHost,
					   streams[stream_id]) );
#else 
		if ( num_col_stream > 0 ) {   
		    my_zgemm_("N", "N", &nbrow, &num_col_stream, &ldu,
			      &alpha, &lusup[luptr+(knsupc-ldu)*nsupr],
			      &nsupr, tempu+ldu*st_col, &ldu, &beta,
			      tempv1, &nbrow, 1, 1);
		}
		
#endif 
		
	    } /* end for i = 1 to num_streams used */
	    
	    int num_col = full_u_cols[jjj_st+ncpu_blks-1];
	    int st_col = 0;        /*special case for cpu */
	    tempv = bigV + nbrow * st_col;
	    tempu = bigU;
	    
	    double tstart = SuperLU_timer_();
#if defined (USE_VENDOR_BLAS)            
	    zgemm_("N", "N", &nbrow, &num_col, &ldu, &alpha,
		  &lusup[luptr+(knsupc-ldu)*nsupr], &nsupr,
		  tempu+ldu*st_col, &ldu, &beta, tempv, &nbrow, 1, 1);
#else
	    zgemm_("N", "N", &nbrow, &num_col, &ldu, &alpha,
		  &lusup[luptr+(knsupc-ldu)*nsupr], &nsupr,
		  tempu+ldu*st_col, &ldu, &beta, tempv, &nbrow);
#endif
	    gemm_timer += SuperLU_timer_() -tstart;
	    stat->ops[FACT] += 2 * nbrow * ldu * full_u_cols[jjj-1];
	    
	    // printf("after zgemm \n");
	    
            /* Now scattering blocks handled by cpu */
            int temp_ncol;
	    
            /* scatter first blocks which cpu has computated*/
            tstart = SuperLU_timer_();

#ifdef _OPENMP
#pragma omp parallel  \
    private(j,iukp,rukp, tempu, tempv, cum_nrow, jb, nsupc,ljb,	\
	    segsize,lead_zero,					\
	    ib, temp_nbrow,ilst,lib,index,			\
	    ijb,fnz,ucol,rel,ldv,lptrj,luptrj,			\
	    nzval,     lb ,                     jj, i)		\
    firstprivate(luptr,lptr) default (shared)
#endif
            {
                int thread_id = omp_get_thread_num();
        
                int* indirect_thread = indirect + ldt*thread_id;
                int* indirect2_thread = indirect2 + ldt*thread_id;
                doublecomplex* tempv1;
                
                if (ncpu_blks< omp_get_num_threads()) {
                    // TAU_STATIC_TIMER_START("SPECIAL_CPU_SCATTER");
                    
                    for (j = jjj_st; j < jjj_st+ncpu_blks; ++j) {
                        /* code */
                        #ifdef PI_DEBUG
                            printf("scattering %d  block column\n",j);
                        #endif

                        /* == processing each of the remaining columns == */

                        if(j==jjj_st) tempv1 = bigV;
                        else tempv1 = bigV + full_u_cols[j-1]*nbrow;

                        arrive_at_ublock( j,&iukp,&rukp,&jb,&ljb,&nsupc,
					  iukp0,rukp0,usub,perm_u,xsup,grid );

                        cum_nrow =0 ;

                        /* do update with the kth column of L and (k,j)th block of U */
                        lptr = lptr0;
                        luptr = luptr0;

#ifdef _OPENMP
#pragma omp for schedule( SCHEDULE_STRATEGY ) nowait
#endif
                        for (lb = 0; lb < nlb; lb++ ) {
                            int cum_nrow = 0;
                            int temp_nbrow;
                            lptr = lptr0;
                            luptr = luptr0;
                            for (int i = 0; i < lb; ++i) {
                                ib = lsub[lptr];        /* Row block L(i,k). */
                                temp_nbrow = lsub[lptr+1];   /* Number of full rows. */
                                lptr += LB_DESCRIPTOR;  /* Skip descriptor. */
                                lptr += temp_nbrow;
                                luptr += temp_nbrow;
                                cum_nrow +=temp_nbrow;
                            }

                            ib = lsub[lptr];       /* Row block L(i,k). */
                            temp_nbrow = lsub[lptr+1];  /* Number of full rows. */
                            assert(temp_nbrow<=nbrow);

                            lptr += LB_DESCRIPTOR; /* Skip descriptor. */

                            /* Now gather the result into the destination block. */
                            if ( ib < jb ) {  /* A(i,j) is in U. */
                                #ifdef PI_DEBUG
                                    printf("cpu scatter \n");
                                    printf("A(%d,%d) goes to U block %d \n", ib,jb,ljb);
                                #endif

                                tempv = tempv1+cum_nrow;
                                zscatter_u (
						 ib,jb,
						 nsupc,iukp,xsup,
						 klst,nbrow,
						 lptr,temp_nbrow,lsub,
						 usub,tempv,
						 Ufstnz_br_ptr,
						 Unzval_br_ptr,
						 grid
						 );
                            } else {    /* A(i,j) is in L. */
#ifdef PI_DEBUG
                                printf("cpu scatter \n");
                                printf("A(%d,%d) goes to L block %d \n", ib,jb,ljb);
#endif
                                
                                tempv = tempv1+cum_nrow;

                                zscatter_l (
						 ib, ljb,nsupc,iukp,xsup,klst,nbrow,lptr,
						 temp_nbrow,usub,lsub,tempv,
						 indirect_thread,indirect2_thread,
						 Lrowind_bc_ptr,Lnzval_bc_ptr,grid
						 );
                            } /* if ib < jb ... */

                            lptr += temp_nbrow;
                            luptr += temp_nbrow;
                            cum_nrow += temp_nbrow;

                        } /* for lb ... */

                        luptr=luptr0;
                    } /* for j = jjj_st ... */

                    // TAU_STATIC_TIMER_STOP("SPECIAL_CPU_SCATTER");
                } else {
#ifdef _OPENMP
#pragma omp for schedule(SCHEDULE_STRATEGY) nowait
#endif
                    for (j = jjj_st; j < jjj_st+ncpu_blks; ++j) {
                        /* code */
                        #ifdef PI_DEBUG
                            printf("scattering %d  block column\n",j);
                        #endif 

                        /* == processing each of the remaining columns == */
                        if(j==jjj_st) tempv1 = bigV;
                        else tempv1 = bigV + full_u_cols[j-1]*nbrow;

                        arrive_at_ublock( j,&iukp,&rukp,&jb,&ljb,&nsupc,
					  iukp0,rukp0,usub,perm_u,xsup,grid );
                        cum_nrow =0 ;

                        /* do update with the kth column of L and (k,j)th block of U */
                        lptr = lptr0;
                        luptr = luptr0;

                        for (lb = 0; lb < nlb; lb++ ) {
                            ib = lsub[lptr];       /* Row block L(i,k). */
                            temp_nbrow = lsub[lptr+1];  /* Number of full rows. */
                            assert(temp_nbrow<=nbrow);

                            lptr += LB_DESCRIPTOR; /* Skip descriptor. */
#ifdef DGEMM_STAT
			    if(j==jjj_st) {
				temp_ncol = full_u_cols[j];
			    } else {
				temp_ncol = full_u_cols[j]- full_u_cols[j-1];  
			    }
			    printf("%d %d %d \n",temp_nbrow, temp_ncol,ldu);
#endif

			    /* Now gather the result into the destination block. */
			    if ( ib < jb ) {  /* A(i,j) is in U. */
#ifdef PI_DEBUG
				printf("cpu scatter \n");
				printf("A(%d,%d) goes to U block %d \n", ib,jb,ljb);
#endif

				tempv = tempv1+cum_nrow;
                                zscatter_u (
						 ib,jb,
						 nsupc,iukp,xsup,
						 klst,nbrow,
						 lptr,temp_nbrow,lsub,
						 usub,tempv,
						 Ufstnz_br_ptr,
						 Unzval_br_ptr,
						 grid
						 );
			    } else {    /* A(i,j) is in L. */
#ifdef PI_DEBUG
                                printf("cpu scatter \n");
                                printf("A(%d,%d) goes to L block %d \n", ib,jb,ljb);
#endif
                                tempv = tempv1+cum_nrow;

                                zscatter_l (
						 ib, ljb,nsupc,iukp,xsup,klst,nbrow,lptr,
						 temp_nbrow,usub,lsub,tempv,
						 indirect_thread,indirect2_thread,
						 Lrowind_bc_ptr,Lnzval_bc_ptr,grid
						 );
			    } /* if ib < jb ... */

			    lptr += temp_nbrow;
			    luptr += temp_nbrow;
			    cum_nrow += temp_nbrow;
			
			} /* for lb ... */

			luptr=luptr0;
		    } /* for j = jjj_st ... */
		}     /* else if (ncpu_blks >= omp_get_num_threads()) */
	    }         /* parallel region */

	    scatter_timer += SuperLU_timer_() - tstart; 
#ifdef _OPENMP
#pragma omp parallel							\
    private(j,iukp,rukp, tempu, tempv, cum_nrow, jb, nsupc,ljb,		\
	    segsize,lead_zero,						\
	    ib, temp_nbrow,ilst,lib,index,				\
	    ijb,fnz,ucol,rel,ldv,lptrj,luptrj,				\
	    nzval,     lb ,                     jj, i)			\
    firstprivate(luptr,lptr) default (shared)
#endif
            {
                int thread_id = omp_get_thread_num();
        
                int* indirect_thread = indirect + ldt*thread_id;
                int* indirect2_thread = indirect2 + ldt*thread_id;
                doublecomplex* tempv1;
                for(i = 0; i < num_streams_used; i++) { /* i is private variable */
                    checkCuda(cudaStreamSynchronize (streams[i]));
                    int jjj_st1 = (i==0) ? jjj_st + ncpu_blks : jjj_st + stream_end_col[i-1];
                    int jjj_end = jjj_st + stream_end_col[i];
                    assert(jjj_end-1jjj_st) ;

                    /* now scatter it */
#pragma omp for schedule( SCHEDULE_STRATEGY ) nowait 
                    for (j = jjj_st1; j < jjj_end; ++j) {
                        /* code */
#ifdef PI_DEBUG
			printf("scattering %d  block column\n",j);
#endif 
                        /* == processing each of the remaining columns == */

                        if(j==jjj_st) tempv1 = bigV;
                        else tempv1 = bigV + full_u_cols[j-1]*nbrow;

                        arrive_at_ublock( j,&iukp,&rukp,&jb,&ljb,&nsupc,
					  iukp0,rukp0,usub,perm_u,xsup,grid );
                        cum_nrow =0 ;

                        /* do update with the kth column of L and (k,j)th block of U */
                        lptr = lptr0;
                        luptr = luptr0;
                        for (lb = 0; lb < nlb; lb++) {
                            ib = lsub[lptr];       /* Row block L(i,k). */
                            temp_nbrow = lsub[lptr+1];  /* Number of full rows. */
                            assert(temp_nbrow<=nbrow);

                            lptr += LB_DESCRIPTOR; /* Skip descriptor. */
#ifdef DGEMM_STAT
			    if(j==jjj_st) {
				temp_ncol = full_u_cols[j];
			    } else {
				temp_ncol = full_u_cols[j]- full_u_cols[j-1];  
			    }
			    printf("%d %d %d \n",temp_nbrow, temp_ncol,ldu);
#endif

                            /* Now gather the result into the destination block. */
                            if ( ib < jb ) { /* A(i,j) is in U. */
#ifdef PI_DEBUG
				printf("gpu scatter \n");
				printf("A(%d,%d) goes to U block %d \n", ib,jb,ljb);
#endif
                                tempv = tempv1+cum_nrow;
                                zscatter_u (
						 ib,jb,
						 nsupc,iukp,xsup,
						 klst,nbrow,
						 lptr,temp_nbrow,lsub,
						 usub,tempv,
						 Ufstnz_br_ptr,
						 Unzval_br_ptr,
						 grid
						 );
                            } else {    /* A(i,j) is in L. */
#ifdef PI_DEBUG
                                printf("gpu scatter \n");
                                printf("A(%d,%d) goes to L block %d \n", ib,jb,ljb);
#endif
                                tempv = tempv1+cum_nrow;

                                zscatter_l (
						 ib, ljb,nsupc,iukp,xsup,klst,nbrow,lptr,
						 temp_nbrow,usub,lsub,tempv,
						 indirect_thread,indirect2_thread,
						 Lrowind_bc_ptr,Lnzval_bc_ptr,grid
						 );
                            } /* if ib < jb ... */

                            lptr += temp_nbrow;
                            luptr += temp_nbrow;
                            cum_nrow += temp_nbrow;
			    
                        } /* for lb ... */

                        luptr=luptr0;
                    } /* for j = jjj_st ... */
                    
                } /* end for i = 0 to nstreams */
                // TAU_STATIC_TIMER_STOP("GPU_SCATTER");
                // TAU_STATIC_TIMER_STOP("INSIDE_OMP");
            } /* end pragma omp parallel */
            // TAU_STATIC_TIMER_STOP("OUTSIDE_OMP");
        }  /* end while(jjj0 */

 }   /* if msg1 and msg 2 */



SuperLU_DIST_5.3.0/SRC/pdgsequ.c0000644013363400111340000001644513233431301015111 0ustar  xiaoyessg/*! \file
Copyright (c) 2003, The Regents of the University of California, through
Lawrence Berkeley National Laboratory (subject to receipt of any required 
approvals from U.S. Dept. of Energy) 

All rights reserved. 

The source code is distributed under BSD license, see the file License.txt
at the top-level directory.
*/


/*! @file 
 * \brief Computes row and column scalings
 *
 * File name:	pdgsequ.c
 * History:     Modified from LAPACK routine DGEEQU
 */
#include 
#include "superlu_ddefs.h"

/*! \brief

 
    
    Purpose   
    =======   

    PDGSEQU computes row and column scalings intended to equilibrate an   
    M-by-N sparse matrix A and reduce its condition number. R returns the row
    scale factors and C the column scale factors, chosen to try to make   
    the largest element in each row and column of the matrix B with   
    elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1.   

    R(i) and C(j) are restricted to be between SMLNUM = smallest safe   
    number and BIGNUM = largest safe number.  Use of these scaling   
    factors is not guaranteed to reduce the condition number of A but   
    works well in practice.   

    See supermatrix.h for the definition of 'SuperMatrix' structure.
 
    Arguments   
    =========   

    A       (input) SuperMatrix*
            The matrix of dimension (A->nrow, A->ncol) whose equilibration
            factors are to be computed. The type of A can be:
            Stype = SLU_NR_loc; Dtype = SLU_D; Mtype = SLU_GE.
	    
    R       (output) double*, size A->nrow
            If INFO = 0 or INFO > M, R contains the row scale factors   
            for A.
	    
    C       (output) double*, size A->ncol
            If INFO = 0,  C contains the column scale factors for A.
	    
    ROWCND  (output) double*
            If INFO = 0 or INFO > M, ROWCND contains the ratio of the   
            smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and   
            AMAX is neither too large nor too small, it is not worth   
            scaling by R.
	    
    COLCND  (output) double*
            If INFO = 0, COLCND contains the ratio of the smallest   
            C(i) to the largest C(i).  If COLCND >= 0.1, it is not   
            worth scaling by C.
	    
    AMAX    (output) double*
            Absolute value of largest matrix element.  If AMAX is very   
            close to overflow or very close to underflow, the matrix   
            should be scaled.
	    
    INFO    (output) int*
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   
            > 0:  if INFO = i,  and i is   
                  <= M:  the i-th row of A is exactly zero   
                  >  M:  the (i-M)-th column of A is exactly zero   

    GRID    (input) gridinof_t*
            The 2D process mesh.
    ===================================================================== 
*/ void pdgsequ(SuperMatrix *A, double *r, double *c, double *rowcnd, double *colcnd, double *amax, int_t *info, gridinfo_t *grid) { /* Local variables */ NRformat_loc *Astore; double *Aval; int i, j, irow, jcol, m_loc; double rcmin, rcmax; double bignum, smlnum; double tempmax, tempmin; double *loc_max; int *r_sizes, *displs; double *loc_r; int_t procs; /* Test the input parameters. */ *info = 0; if ( A->nrow < 0 || A->ncol < 0 || A->Stype != SLU_NR_loc || A->Dtype != SLU_D || A->Mtype != SLU_GE ) *info = -1; if (*info != 0) { i = -(*info); pxerr_dist("pdgsequ", grid, i); return; } /* Quick return if possible */ if ( A->nrow == 0 || A->ncol == 0 ) { *rowcnd = 1.; *colcnd = 1.; *amax = 0.; return; } Astore = A->Store; Aval = Astore->nzval; m_loc = Astore->m_loc; /* Get machine constants. */ smlnum = dmach_dist("S"); bignum = 1. / smlnum; /* Compute row scale factors. */ for (i = 0; i < A->nrow; ++i) r[i] = 0.; /* Find the maximum element in each row. */ irow = Astore->fst_row; for (i = 0; i < m_loc; ++i) { for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) r[irow] = SUPERLU_MAX( r[irow], fabs(Aval[j]) ); ++irow; } /* Find the maximum and minimum scale factors. */ rcmin = bignum; rcmax = 0.; for (i = Astore->fst_row; i < Astore->fst_row + m_loc; ++i) { rcmax = SUPERLU_MAX(rcmax, r[i]); rcmin = SUPERLU_MIN(rcmin, r[i]); } /* Get the global MAX and MIN for R */ tempmax = rcmax; tempmin = rcmin; MPI_Allreduce( &tempmax, &rcmax, 1, MPI_DOUBLE, MPI_MAX, grid->comm); MPI_Allreduce( &tempmin, &rcmin, 1, MPI_DOUBLE, MPI_MIN, grid->comm); *amax = rcmax; if (rcmin == 0.) { /* Find the first zero scale factor and return an error code. */ for (i = 0; i < A->nrow; ++i) if (r[i] == 0.) { *info = i + 1; return; } } else { /* Invert the scale factors. */ for (i = 0; i < A->nrow; ++i) r[i] = 1. / SUPERLU_MIN( SUPERLU_MAX( r[i], smlnum ), bignum ); /* Compute ROWCND = min(R(I)) / max(R(I)) */ *rowcnd = SUPERLU_MAX( rcmin, smlnum ) / SUPERLU_MIN( rcmax, bignum ); } /* Compute column scale factors */ for (j = 0; j < A->ncol; ++j) c[j] = 0.; /* Find the maximum element in each column, assuming the row scalings computed above. */ irow = Astore->fst_row; for (i = 0; i < m_loc; ++i) { for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) { jcol = Astore->colind[j]; c[jcol] = SUPERLU_MAX( c[jcol], fabs(Aval[j]) * r[irow] ); } ++irow; } /* Find the global maximum for c[j] */ if ( !(loc_max = doubleMalloc_dist(A->ncol))) ABORT("Malloc fails for loc_max[]."); for (j = 0; j < A->ncol; ++j) loc_max[j] = c[j]; MPI_Allreduce(loc_max, c, A->ncol, MPI_DOUBLE, MPI_MAX, grid->comm); SUPERLU_FREE(loc_max); /* Find the maximum and minimum scale factors. */ rcmin = bignum; rcmax = 0.; for (j = 0; j < A->ncol; ++j) { rcmax = SUPERLU_MAX(rcmax, c[j]); rcmin = SUPERLU_MIN(rcmin, c[j]); } if (rcmin == 0.) { /* Find the first zero scale factor and return an error code. */ for (j = 0; j < A->ncol; ++j) if ( c[j] == 0. ) { *info = A->nrow + j + 1; return; } } else { /* Invert the scale factors. */ for (j = 0; j < A->ncol; ++j) c[j] = 1. / SUPERLU_MIN( SUPERLU_MAX( c[j], smlnum ), bignum); /* Compute COLCND = min(C(J)) / max(C(J)) */ *colcnd = SUPERLU_MAX( rcmin, smlnum ) / SUPERLU_MIN( rcmax, bignum ); } /* gather R from each process to get the global R. */ procs = grid->nprow * grid->npcol; if ( !(r_sizes = SUPERLU_MALLOC(2 * procs * sizeof(int)))) ABORT("Malloc fails for r_sizes[]."); displs = r_sizes + procs; if ( !(loc_r = doubleMalloc_dist(m_loc))) ABORT("Malloc fails for loc_r[]."); j = Astore->fst_row; for (i = 0; i < m_loc; ++i) loc_r[i] = r[j++]; /* First gather the size of each piece. */ MPI_Allgather(&m_loc, 1, MPI_INT, r_sizes, 1, MPI_INT, grid->comm); /* Set up the displacements for allgatherv */ displs[0] = 0; for (i = 1; i < procs; ++i) displs[i] = displs[i-1] + r_sizes[i-1]; /* Now gather the actual data */ MPI_Allgatherv(loc_r, m_loc, MPI_DOUBLE, r, r_sizes, displs, MPI_DOUBLE, grid->comm); SUPERLU_FREE(r_sizes); SUPERLU_FREE(loc_r); return; } /* pdgsequ */ SuperLU_DIST_5.3.0/SRC/dreadMM.c0000644013363400111340000001357613233431301014754 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief * Contributed by Francois-Henry Rouet. * */ #include #include #include "superlu_ddefs.h" #undef EXPAND_SYM /*! brief * *
 * Output parameters
 * =================
 *   (nzval, rowind, colptr): (*rowind)[*] contains the row subscripts of
 *      nonzeros in columns of matrix A; (*nzval)[*] the numerical values;
 *	column i of A is given by (*nzval)[k], k = (*rowind)[i],...,
 *      (*rowind)[i+1]-1.
 * 
*/ void dreadMM_dist(FILE *fp, int_t *m, int_t *n, int_t *nonz, double **nzval, int_t **rowind, int_t **colptr) { int_t j, k, jsize, nnz, nz, new_nonz; double *a, *val; int_t *asub, *xa, *row, *col; int_t zero_base = 0; char *p, line[512], banner[64], mtx[64], crd[64], arith[64], sym[64]; int expand; char *cs; /* File format: * %%MatrixMarket matrix coordinate real general/symmetric/... * % ... * % (optional comments) * % ... * #rows #non-zero * Triplet in the rest of lines: row col value */ /* 1/ read header */ cs = fgets(line,512,fp); for (p=line; *p!='\0'; *p=tolower(*p),p++); if (sscanf(line, "%s %s %s %s %s", banner, mtx, crd, arith, sym) != 5) { printf("Invalid header (first line does not contain 5 tokens)\n"); exit; } if(strcmp(banner,"%%matrixmarket")) { printf("Invalid header (first token is not \"%%%%MatrixMarket\")\n"); exit(-1); } if(strcmp(mtx,"matrix")) { printf("Not a matrix; this driver cannot handle that.\n"); exit(-1); } if(strcmp(crd,"coordinate")) { printf("Not in coordinate format; this driver cannot handle that.\n"); exit(-1); } if(strcmp(arith,"real")) { if(!strcmp(arith,"complex")) { printf("Complex matrix; use zreadMM instead!\n"); exit(-1); } else if(!strcmp(arith, "pattern")) { printf("Pattern matrix; values are needed!\n"); exit(-1); } else { printf("Unknown arithmetic\n"); exit(-1); } } if(strcmp(sym,"general")) { printf("Symmetric matrix: will be expanded\n"); expand=1; } else expand=0; /* 2/ Skip comments */ while(banner[0]=='%') { cs = fgets(line,512,fp); sscanf(line,"%s",banner); } /* 3/ Read n and nnz */ #ifdef _LONGINT sscanf(line, "%ld%ld%ld",m, n, nonz); #else sscanf(line, "%d%d%d",m, n, nonz); #endif if(*m!=*n) { printf("Rectangular matrix!. Abort\n"); exit(-1); } if(expand) new_nonz = 2 * *nonz - *n; else new_nonz = *nonz; *m = *n; printf("m %lld, n %lld, nonz %lld\n", (long long) *m, (long long) *n, (long long) *nonz); fflush(stdout); dallocateA_dist(*n, new_nonz, nzval, rowind, colptr); /* Allocate storage */ a = *nzval; asub = *rowind; xa = *colptr; if ( !(val = doubleMalloc_dist(new_nonz)) ) ABORT("Malloc fails for val[]"); if ( !(row = (int_t *) intMalloc_dist(new_nonz)) ) ABORT("Malloc fails for row[]"); if ( !(col = (int_t *) intMalloc_dist(new_nonz)) ) ABORT("Malloc fails for col[]"); for (j = 0; j < *n; ++j) xa[j] = 0; /* 4/ Read triplets of values */ for (nnz = 0, nz = 0; nnz < *nonz; ++nnz) { #ifdef _LONGINT j = fscanf(fp, "%lld%lld%lf\n", &row[nz], &col[nz], &val[nz]); #else j = fscanf(fp, "%d%d%lf\n", &row[nz], &col[nz], &val[nz]); #endif if ( nnz == 0 ) /* first nonzero */ { if ( row[0] == 0 || col[0] == 0 ) { zero_base = 1; printf("triplet file: row/col indices are zero-based.\n"); } else printf("triplet file: row/col indices are one-based.\n"); fflush(stdout); } if ( !zero_base ) { /* Change to 0-based indexing. */ --row[nz]; --col[nz]; } if (row[nz] < 0 || row[nz] >= *m || col[nz] < 0 || col[nz] >= *n /*|| val[nz] == 0.*/) { fprintf(stderr, "nz " IFMT ", (" IFMT ", " IFMT ") = %e out of bound, removed\n", nz, row[nz], col[nz], val[nz]); exit(-1); } else { ++xa[col[nz]]; if(expand) { if ( row[nz] != col[nz] ) { /* Excluding diagonal */ ++nz; row[nz] = col[nz-1]; col[nz] = row[nz-1]; val[nz] = val[nz-1]; ++xa[col[nz]]; } } ++nz; } } *nonz = nz; if(expand) { printf("new_nonz after symmetric expansion:\t" IFMT "\n", *nonz); fflush(stdout); } /* Initialize the array of column pointers */ k = 0; jsize = xa[0]; xa[0] = 0; for (j = 1; j < *n; ++j) { k += jsize; jsize = xa[j]; xa[j] = k; } /* Copy the triplets into the column oriented storage */ for (nz = 0; nz < *nonz; ++nz) { j = col[nz]; k = xa[j]; asub[k] = row[nz]; a[k] = val[nz]; ++xa[j]; } /* Reset the column pointers to the beginning of each column */ for (j = *n; j > 0; --j) xa[j] = xa[j-1]; xa[0] = 0; SUPERLU_FREE(val); SUPERLU_FREE(row); SUPERLU_FREE(col); #ifdef CHK_INPUT int i; for (i = 0; i < *n; i++) { printf("Col %d, xa %d\n", i, xa[i]); for (k = xa[i]; k < xa[i+1]; k++) printf("%d\t%16.10f\n", asub[k], a[k]); } #endif } static void dreadrhs(int m, double *b) { FILE *fp, *fopen(); int i; if ( !(fp = fopen("b.dat", "r")) ) { fprintf(stderr, "dreadrhs: file does not exist\n"); exit(-1); } for (i = 0; i < m; ++i) i = fscanf(fp, "%lf\n", &b[i]); /*fscanf(fp, "%d%lf\n", &j, &b[i]);*/ /* readpair_(j, &b[i]);*/ fclose(fp); } SuperLU_DIST_5.3.0/SRC/dreadhb.c0000644013363400111340000002653613233431301015034 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Read a DOUBLE PRECISION matrix stored in Harwell-Boeing format * *
 * -- Distributed SuperLU routine (version 1.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 * 
*/ #include #include #include #include "superlu_ddefs.h" /* * Prototypes */ static void ReadVector(FILE *, int_t, int_t *, int_t, int_t); static void dReadValues(FILE *, int_t, double *, int_t, int_t); extern void FormFullA(int_t, int_t *, double **, int_t **, int_t **); static int DumpLine(FILE *); static int ParseIntFormat(char *, int_t *, int_t *); static int ParseFloatFormat(char *, int_t *, int_t *); /*! \brief * *
 * Purpose
 * =======
 * 
 * Read a DOUBLE PRECISION matrix stored in Harwell-Boeing format 
 * as described below.
 * 
 * Line 1 (A72,A8) 
 *  	Col. 1 - 72   Title (TITLE) 
 *	Col. 73 - 80  Key (KEY) 
 * 
 * Line 2 (5I14) 
 * 	Col. 1 - 14   Total number of lines excluding header (TOTCRD) 
 * 	Col. 15 - 28  Number of lines for pointers (PTRCRD) 
 * 	Col. 29 - 42  Number of lines for row (or variable) indices (INDCRD) 
 * 	Col. 43 - 56  Number of lines for numerical values (VALCRD) 
 *	Col. 57 - 70  Number of lines for right-hand sides (RHSCRD) 
 *                    (including starting guesses and solution vectors 
 *		       if present) 
 *           	      (zero indicates no right-hand side data is present) 
 *
 * Line 3 (A3, 11X, 4I14) 
 *   	Col. 1 - 3    Matrix type (see below) (MXTYPE) 
 * 	Col. 15 - 28  Number of rows (or variables) (NROW) 
 * 	Col. 29 - 42  Number of columns (or elements) (NCOL) 
 *	Col. 43 - 56  Number of row (or variable) indices (NNZERO) 
 *	              (equal to number of entries for assembled matrices) 
 * 	Col. 57 - 70  Number of elemental matrix entries (NELTVL) 
 *	              (zero in the case of assembled matrices) 
 * Line 4 (2A16, 2A20) 
 * 	Col. 1 - 16   Format for pointers (PTRFMT) 
 *	Col. 17 - 32  Format for row (or variable) indices (INDFMT) 
 *	Col. 33 - 52  Format for numerical values of coefficient matrix (VALFMT) 
 * 	Col. 53 - 72 Format for numerical values of right-hand sides (RHSFMT) 
 *
 * Line 5 (A3, 11X, 2I14) Only present if there are right-hand sides present 
 *    	Col. 1 	      Right-hand side type: 
 *	         	  F for full storage or M for same format as matrix 
 *    	Col. 2        G if a starting vector(s) (Guess) is supplied. (RHSTYP) 
 *    	Col. 3        X if an exact solution vector(s) is supplied. 
 *	Col. 15 - 28  Number of right-hand sides (NRHS) 
 *	Col. 29 - 42  Number of row indices (NRHSIX) 
 *          	      (ignored in case of unassembled matrices) 
 *
 * The three character type field on line 3 describes the matrix type. 
 * The following table lists the permitted values for each of the three 
 * characters. As an example of the type field, RSA denotes that the matrix 
 * is real, symmetric, and assembled. 
 *
 * First Character: 
 *	R Real matrix 
 *	C Complex matrix 
 *	P Pattern only (no numerical values supplied) 
 *
 * Second Character: 
 *	S Symmetric 
 *	U Unsymmetric 
 *	H Hermitian 
 *	Z Skew symmetric 
 *	R Rectangular 
 *
 * Third Character: 
 *	A Assembled 
 *	E Elemental matrices (unassembled) 
 * 
*/ void dreadhb_dist(int iam, FILE *fp, int_t *nrow, int_t *ncol, int_t *nonz, double **nzval, int_t **rowind, int_t **colptr) { register int_t i, numer_lines, rhscrd = 0; int_t tmp, colnum, colsize, rownum, rowsize, valnum, valsize; char buf[100], type[4]; int_t sym; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(0, "Enter dreadhb_dist()"); #endif /* Line 1 */ fgets(buf, 100, fp); /* Line 2 */ for (i=0; i<5; i++) { fscanf(fp, "%14c", buf); buf[14] = 0; tmp = atoi(buf); /*sscanf(buf, "%d", &tmp);*/ if (i == 3) numer_lines = tmp; if (i == 4 && tmp) rhscrd = tmp; } DumpLine(fp); /* Line 3 */ fscanf(fp, "%3c", type); fscanf(fp, "%11c", buf); /* pad */ type[3] = 0; #if ( DEBUGlevel>=1 ) if ( !iam ) printf("Matrix type %s\n", type); #endif fscanf(fp, "%14c", buf); *nrow = atoi(buf); fscanf(fp, "%14c", buf); *ncol = atoi(buf); fscanf(fp, "%14c", buf); *nonz = atoi(buf); fscanf(fp, "%14c", buf); tmp = atoi(buf); if (tmp != 0) if ( !iam ) printf("This is not an assembled matrix!\n"); if (*nrow != *ncol) if ( !iam ) printf("Matrix is not square.\n"); DumpLine(fp); /* Allocate storage for the three arrays ( nzval, rowind, colptr ) */ dallocateA_dist(*ncol, *nonz, nzval, rowind, colptr); /* Line 4: format statement */ fscanf(fp, "%16c", buf); ParseIntFormat(buf, &colnum, &colsize); fscanf(fp, "%16c", buf); ParseIntFormat(buf, &rownum, &rowsize); fscanf(fp, "%20c", buf); ParseFloatFormat(buf, &valnum, &valsize); fscanf(fp, "%20c", buf); DumpLine(fp); /* Line 5: right-hand side */ if ( rhscrd ) DumpLine(fp); /* skip RHSFMT */ #if ( DEBUGlevel>=1 ) if ( !iam ) { printf(IFMT " rows, " IFMT " nonzeros\n", *nrow, *nonz); printf("colnum " IFMT ", colsize " IFMT "\n", colnum, colsize); printf("rownum " IFMT ", rowsize " IFMT "\n", rownum, rowsize); printf("valnum " IFMT ", valsize " IFMT "\n", valnum, valsize); } #endif ReadVector(fp, *ncol+1, *colptr, colnum, colsize); #if ( DEBUGlevel>=1 ) if ( !iam ) printf("read colptr[" IFMT "] = " IFMT "\n", *ncol, (*colptr)[*ncol]); #endif ReadVector(fp, *nonz, *rowind, rownum, rowsize); #if ( DEBUGlevel>=1 ) if ( !iam ) printf("read rowind[" IFMT "] = " IFMT "\n", *nonz-1, (*rowind)[*nonz-1]); #endif if ( numer_lines ) { dReadValues(fp, *nonz, *nzval, valnum, valsize); #if ( DEBUGlevel>=1 ) if ( !iam ) printf("read nzval[" IFMT "] = %e\n", *nonz-1, (*nzval)[*nonz-1]); #endif } sym = (type[1] == 'S' || type[1] == 's'); if ( sym ) { FormFullA(*ncol, nonz, nzval, rowind, colptr); } fclose(fp); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(0, "Exit dreadhb_dist()"); #endif } /* Eat up the rest of the current line */ static int DumpLine(FILE *fp) { register int c; while ((c = fgetc(fp)) != '\n') ; return 0; } static int ParseIntFormat(char *buf, int_t *num, int_t *size) { char *tmp; tmp = buf; while (*tmp++ != '(') ; *num = atoi(tmp); while (*tmp != 'I' && *tmp != 'i') ++tmp; ++tmp; *size = atoi(tmp); return 0; } static int ParseFloatFormat(char *buf, int_t *num, int_t *size) { char *tmp, *period; tmp = buf; while (*tmp++ != '(') ; *num = atoi(tmp); while (*tmp != 'E' && *tmp != 'e' && *tmp != 'D' && *tmp != 'd' && *tmp != 'F' && *tmp != 'f') { /* May find kP before nE/nD/nF, like (1P6F13.6). In this case the num picked up refers to P, which should be skipped. */ if (*tmp=='p' || *tmp=='P') { ++tmp; *num = atoi(tmp); /*sscanf(tmp, "%d", num);*/ } else { ++tmp; } } ++tmp; period = tmp; while (*period != '.' && *period != ')') ++period ; *period = '\0'; *size = atoi(tmp); return 0; } static void ReadVector(FILE *fp, int_t n, int_t *where, int_t perline, int_t persize) { register int_t i, j, item; char tmp, buf[100]; i = 0; while (i < n) { fgets(buf, 100, fp); /* read a line at a time */ for (j=0; j * On input, nonz/nzval/rowind/colptr represents lower part of a symmetric * matrix. On exit, it represents the full matrix with lower and upper parts. *
*/ extern void FormFullA(int_t n, int_t *nonz, double **nzval, int_t **rowind, int_t **colptr) { register int_t i, j, k, col, new_nnz; int_t *t_rowind, *t_colptr, *al_rowind, *al_colptr, *a_rowind, *a_colptr; int_t *marker; double *t_val, *al_val, *a_val; al_rowind = *rowind; al_colptr = *colptr; al_val = *nzval; if ( !(marker =(int_t *) SUPERLU_MALLOC( (n+1) * sizeof(int_t)) ) ) ABORT("SUPERLU_MALLOC fails for marker[]"); if ( !(t_colptr = (int_t *) SUPERLU_MALLOC( (n+1) * sizeof(int_t)) ) ) ABORT("SUPERLU_MALLOC t_colptr[]"); if ( !(t_rowind = (int_t *) SUPERLU_MALLOC( *nonz * sizeof(int_t)) ) ) ABORT("SUPERLU_MALLOC fails for t_rowind[]"); if ( !(t_val = (double*) SUPERLU_MALLOC( *nonz * sizeof(double)) ) ) ABORT("SUPERLU_MALLOC fails for t_val[]"); /* Get counts of each column of T, and set up column pointers */ for (i = 0; i < n; ++i) marker[i] = 0; for (j = 0; j < n; ++j) { for (i = al_colptr[j]; i < al_colptr[j+1]; ++i) ++marker[al_rowind[i]]; } t_colptr[0] = 0; for (i = 0; i < n; ++i) { t_colptr[i+1] = t_colptr[i] + marker[i]; marker[i] = t_colptr[i]; } /* Transpose matrix A to T */ for (j = 0; j < n; ++j) for (i = al_colptr[j]; i < al_colptr[j+1]; ++i) { col = al_rowind[i]; t_rowind[marker[col]] = j; t_val[marker[col]] = al_val[i]; ++marker[col]; } new_nnz = *nonz * 2 - n; if ( !(a_colptr = (int_t *) SUPERLU_MALLOC( (n+1) * sizeof(int_t)) ) ) ABORT("SUPERLU_MALLOC a_colptr[]"); if ( !(a_rowind = (int_t *) SUPERLU_MALLOC( new_nnz * sizeof(int_t)) ) ) ABORT("SUPERLU_MALLOC fails for a_rowind[]"); if ( !(a_val = (double*) SUPERLU_MALLOC( new_nnz * sizeof(double)) ) ) ABORT("SUPERLU_MALLOC fails for a_val[]"); a_colptr[0] = 0; k = 0; for (j = 0; j < n; ++j) { for (i = t_colptr[j]; i < t_colptr[j+1]; ++i) { if ( t_rowind[i] != j ) { /* not diagonal */ a_rowind[k] = t_rowind[i]; a_val[k] = t_val[i]; #if (DEBUGlevel >= 2) if ( fabs(a_val[k]) < 4.047e-300 ) printf("%5d: %e\n", k, a_val[k]); #endif ++k; } } for (i = al_colptr[j]; i < al_colptr[j+1]; ++i) { a_rowind[k] = al_rowind[i]; a_val[k] = al_val[i]; #if (DEBUGlevel >= 2) if ( fabs(a_val[k]) < 4.047e-300 ) printf("%5d: %e\n", k, a_val[k]); #endif ++k; } a_colptr[j+1] = k; } printf("FormFullA: new_nnz = " IFMT ", k = " IFMT "\n", new_nnz, k); SUPERLU_FREE(al_val); SUPERLU_FREE(al_rowind); SUPERLU_FREE(al_colptr); SUPERLU_FREE(marker); SUPERLU_FREE(t_val); SUPERLU_FREE(t_rowind); SUPERLU_FREE(t_colptr); *nzval = a_val; *rowind = a_rowind; *colptr = a_colptr; *nonz = new_nnz; } SuperLU_DIST_5.3.0/SRC/dreadrb.c0000644013363400111340000002415213233431301015036 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file dreadrb.c * \brief Read a matrix stored in Rutherford-Boeing format * *
 * -- Distributed SuperLU routine (version 4.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * August 15, 2014
 *
 * 
* * Purpose * ======= * * Read a DOUBLE PRECISION matrix stored in Rutherford-Boeing format * as described below. * * Line 1 (A72, A8) * Col. 1 - 72 Title (TITLE) * Col. 73 - 80 Matrix name / identifier (MTRXID) * * Line 2 (I14, 3(1X, I13)) * Col. 1 - 14 Total number of lines excluding header (TOTCRD) * Col. 16 - 28 Number of lines for pointers (PTRCRD) * Col. 30 - 42 Number of lines for row (or variable) indices (INDCRD) * Col. 44 - 56 Number of lines for numerical values (VALCRD) * * Line 3 (A3, 11X, 4(1X, I13)) * Col. 1 - 3 Matrix type (see below) (MXTYPE) * Col. 15 - 28 Compressed Column: Number of rows (NROW) * Elemental: Largest integer used to index variable (MVAR) * Col. 30 - 42 Compressed Column: Number of columns (NCOL) * Elemental: Number of element matrices (NELT) * Col. 44 - 56 Compressed Column: Number of entries (NNZERO) * Elemental: Number of variable indeces (NVARIX) * Col. 58 - 70 Compressed Column: Unused, explicitly zero * Elemental: Number of elemental matrix entries (NELTVL) * * Line 4 (2A16, A20) * Col. 1 - 16 Fortran format for pointers (PTRFMT) * Col. 17 - 32 Fortran format for row (or variable) indices (INDFMT) * Col. 33 - 52 Fortran format for numerical values of coefficient matrix * (VALFMT) * (blank in the case of matrix patterns) * * The three character type field on line 3 describes the matrix type. * The following table lists the permitted values for each of the three * characters. As an example of the type field, RSA denotes that the matrix * is real, symmetric, and assembled. * * First Character: * R Real matrix * C Complex matrix * I integer matrix * P Pattern only (no numerical values supplied) * Q Pattern only (numerical values supplied in associated auxiliary value * file) * * Second Character: * S Symmetric * U Unsymmetric * H Hermitian * Z Skew symmetric * R Rectangular * * Third Character: * A Compressed column form * E Elemental form * * */ #include #include #include "superlu_ddefs.h" /*! \brief Eat up the rest of the current line */ static int DumpLine(FILE *fp) { register int c; while ((c = fgetc(fp)) != '\n') ; return 0; } static int ParseIntFormat(char *buf, int_t *num, int_t *size) { char *tmp; tmp = buf; while (*tmp++ != '(') ; *num = atoi(tmp); while (*tmp != 'I' && *tmp != 'i') ++tmp; ++tmp; *size = atoi(tmp); return 0; } static int ParseFloatFormat(char *buf, int_t *num, int_t *size) { char *tmp, *period; tmp = buf; while (*tmp++ != '(') ; *num = atoi(tmp); /*sscanf(tmp, "%d", num);*/ while (*tmp != 'E' && *tmp != 'e' && *tmp != 'D' && *tmp != 'd' && *tmp != 'F' && *tmp != 'f') { /* May find kP before nE/nD/nF, like (1P6F13.6). In this case the num picked up refers to P, which should be skipped. */ if (*tmp=='p' || *tmp=='P') { ++tmp; *num = atoi(tmp); /*sscanf(tmp, "%d", num);*/ } else { ++tmp; } } ++tmp; period = tmp; while (*period != '.' && *period != ')') ++period ; *period = '\0'; *size = atoi(tmp); /*sscanf(tmp, "%2d", size);*/ return 0; } static int ReadVector(FILE *fp, int_t n, int_t *where, int_t perline, int_t persize) { register int_t i, j, item; char tmp, buf[100]; i = 0; while (i < n) { fgets(buf, 100, fp); /* read a line at a time */ for (j=0; j * On input, nonz/nzval/rowind/colptr represents lower part of a symmetric * matrix. On exit, it represents the full matrix with lower and upper parts. * */ static void FormFullA(int_t n, int_t *nonz, double **nzval, int_t **rowind, int_t **colptr) { register int_t i, j, k, col, new_nnz; int_t *t_rowind, *t_colptr, *al_rowind, *al_colptr, *a_rowind, *a_colptr; int_t *marker; double *t_val, *al_val, *a_val; al_rowind = *rowind; al_colptr = *colptr; al_val = *nzval; if ( !(marker = (int_t *) SUPERLU_MALLOC( (n+1) * sizeof(int_t)) ) ) ABORT("SUPERLU_MALLOC fails for marker[]"); if ( !(t_colptr = (int_t *) SUPERLU_MALLOC( (n+1) * sizeof(int_t)) ) ) ABORT("SUPERLU_MALLOC t_colptr[]"); if ( !(t_rowind = (int_t *) SUPERLU_MALLOC( *nonz * sizeof(int_t)) ) ) ABORT("SUPERLU_MALLOC fails for t_rowind[]"); if ( !(t_val = (double*) SUPERLU_MALLOC( *nonz * sizeof(double)) ) ) ABORT("SUPERLU_MALLOC fails for t_val[]"); /* Get counts of each column of T, and set up column pointers */ for (i = 0; i < n; ++i) marker[i] = 0; for (j = 0; j < n; ++j) { for (i = al_colptr[j]; i < al_colptr[j+1]; ++i) ++marker[al_rowind[i]]; } t_colptr[0] = 0; for (i = 0; i < n; ++i) { t_colptr[i+1] = t_colptr[i] + marker[i]; marker[i] = t_colptr[i]; } /* Transpose matrix A to T */ for (j = 0; j < n; ++j) for (i = al_colptr[j]; i < al_colptr[j+1]; ++i) { col = al_rowind[i]; t_rowind[marker[col]] = j; t_val[marker[col]] = al_val[i]; ++marker[col]; } new_nnz = *nonz * 2 - n; if ( !(a_colptr = (int_t *) SUPERLU_MALLOC( (n+1) * sizeof(int_t)) ) ) ABORT("SUPERLU_MALLOC a_colptr[]"); if ( !(a_rowind = (int_t *) SUPERLU_MALLOC( new_nnz * sizeof(int_t)) ) ) ABORT("SUPERLU_MALLOC fails for a_rowind[]"); if ( !(a_val = (double*) SUPERLU_MALLOC( new_nnz * sizeof(double)) ) ) ABORT("SUPERLU_MALLOC fails for a_val[]"); a_colptr[0] = 0; k = 0; for (j = 0; j < n; ++j) { for (i = t_colptr[j]; i < t_colptr[j+1]; ++i) { if ( t_rowind[i] != j ) { /* not diagonal */ a_rowind[k] = t_rowind[i]; a_val[k] = t_val[i]; ++k; } } for (i = al_colptr[j]; i < al_colptr[j+1]; ++i) { a_rowind[k] = al_rowind[i]; a_val[k] = al_val[i]; ++k; } a_colptr[j+1] = k; } printf("FormFullA: new_nnz = " IFMT ", k = " IFMT "\n", new_nnz, k); SUPERLU_FREE(al_val); SUPERLU_FREE(al_rowind); SUPERLU_FREE(al_colptr); SUPERLU_FREE(marker); SUPERLU_FREE(t_val); SUPERLU_FREE(t_rowind); SUPERLU_FREE(t_colptr); *nzval = a_val; *rowind = a_rowind; *colptr = a_colptr; *nonz = new_nnz; } void dreadrb_dist(int iam, FILE *fp, int_t *nrow, int_t *ncol, int_t *nonz, double **nzval, int_t **rowind, int_t **colptr) { register int_t i, numer_lines = 0; int_t tmp, colnum, colsize, rownum, rowsize, valnum, valsize; char buf[100], type[4]; int sym; /* Line 1 */ fgets(buf, 100, fp); fputs(buf, stdout); /* Line 2 */ for (i=0; i<4; i++) { fscanf(fp, "%14c", buf); buf[14] = 0; tmp = atoi(buf); /*sscanf(buf, "%d", &tmp);*/ if (i == 3) numer_lines = tmp; } DumpLine(fp); /* Line 3 */ fscanf(fp, "%3c", type); fscanf(fp, "%11c", buf); /* pad */ type[3] = 0; #if (DEBUGlevel >= 1) if ( !iam ) printf("Matrix type %s\n", type); #endif fscanf(fp, "%14c", buf); *nrow = atoi(buf); fscanf(fp, "%14c", buf); *ncol = atoi(buf); fscanf(fp, "%14c", buf); *nonz = atoi(buf); fscanf(fp, "%14c", buf); tmp = atoi(buf); if (tmp != 0) if ( !iam ) printf("This is not an assembled matrix!\n"); if (*nrow != *ncol) if ( !iam ) printf("Matrix is not square.\n"); DumpLine(fp); /* Allocate storage for the three arrays ( nzval, rowind, colptr ) */ dallocateA_dist(*ncol, *nonz, nzval, rowind, colptr); /* Line 4: format statement */ fscanf(fp, "%16c", buf); ParseIntFormat(buf, &colnum, &colsize); fscanf(fp, "%16c", buf); ParseIntFormat(buf, &rownum, &rowsize); fscanf(fp, "%20c", buf); ParseFloatFormat(buf, &valnum, &valsize); DumpLine(fp); #if (DEBUGlevel >= 1) if ( !iam ) { printf(IFMT " rows, " IFMT " nonzeros\n", *nrow, *nonz); printf("colnum " IFMT ", colsize " IFMT "\n", colnum, colsize); printf("rownum " IFMT ", rowsize " IFMT "\n", rownum, rowsize); printf("valnum " IFMT ", valsize " IFMT "\n", valnum, valsize); } #endif ReadVector(fp, *ncol+1, *colptr, colnum, colsize); ReadVector(fp, *nonz, *rowind, rownum, rowsize); if ( numer_lines ) { dReadValues(fp, *nonz, *nzval, valnum, valsize); } sym = (type[1] == 'S' || type[1] == 's'); if ( sym ) { FormFullA(*ncol, nonz, nzval, rowind, colptr); } fclose(fp); } SuperLU_DIST_5.3.0/SRC/pdgsrfs.c0000644013363400111340000001777213233431301015115 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Improves the computed solution to a system of linear equations and provides error bounds and backward error estimates * *
 * -- Distributed SuperLU routine (version 4.3) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * March 15, 2003
 *
 * Last modified:
 * December 31, 2015
 * 
*/ #include #include "superlu_ddefs.h" /*! \brief * *
 * Purpose
 * =======
 *
 * PDGSRFS improves the computed solution to a system of linear   
 * equations and provides error bounds and backward error estimates
 * for the solution. 
 *
 * Arguments
 * =========
 *
 * n      (input) int (global)
 *        The order of the system of linear equations.
 *
 * A      (input) SuperMatrix*
 *	  The original matrix A, or the scaled A if equilibration was done.
 *        A is also permuted into diag(R)*A*diag(C)*Pc'. The type of A can be:
 *        Stype = SLU_NR_loc; Dtype = SLU_D; Mtype = SLU_GE.
 *
 * anorm  (input) double
 *        The norm of the original matrix A, or the scaled A if
 *        equilibration was done.
 *
 * LUstruct (input) LUstruct_t*
 *        The distributed data structures storing L and U factors.
 *        The L and U factors are obtained from pdgstrf for
 *        the possibly scaled and permuted matrix A.
 *        See superlu_ddefs.h for the definition of 'LUstruct_t'.
 *
 * ScalePermstruct (input) ScalePermstruct_t* (global)
 *         The data structure to store the scaling and permutation vectors
 *         describing the transformations performed to the matrix A.
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh. It contains the MPI communicator, the number
 *        of process rows (NPROW), the number of process columns (NPCOL),
 *        and my process rank. It is an input argument to all the
 *        parallel routines.
 *        Grid can be initialized by subroutine SUPERLU_GRIDINIT.
 *        See superlu_defs.h for the definition of 'gridinfo_t'.
 *
 * B      (input) double* (local)
 *        The m_loc-by-NRHS right-hand side matrix of the possibly
 *        equilibrated system. That is, B may be overwritten by diag(R)*B.
 *       
 * ldb    (input) int (local)
 *        Leading dimension of matrix B.
 *
 * X      (input/output) double* (local)
 *        On entry, the solution matrix Y, as computed by PDGSTRS, of the
 *            transformed system A1*Y = Pc*Pr*B. where
 *            A1 = Pc*Pr*diag(R)*A*diag(C)*Pc' and Y = Pc*diag(C)^(-1)*X.
 *        On exit, the improved solution matrix Y.
 *
 *        In order to obtain the solution X to the original system,
 *        Y should be permutated by Pc^T, and premultiplied by diag(C)
 *        if DiagScale = COL or BOTH.
 *        This must be done after this routine is called.
 *
 * ldx    (input) int (local)
 *        Leading dimension of matrix X.
 *
 * nrhs   (input) int
 *        Number of right-hand sides.
 *
 * SOLVEstruct (output) SOLVEstruct_t* (global)
 *        Contains the information for the communication during the
 *        solution phase.
 *
 * berr   (output) double*, dimension (nrhs)
 *         The componentwise relative backward error of each solution   
 *         vector X(j) (i.e., the smallest relative change in   
 *         any element of A or B that makes X(j) an exact solution).
 *
 * stat   (output) SuperLUStat_t*
 *        Record the statistics about the refinement steps.
 *        See util.h for the definition of SuperLUStat_t.
 *
 * info   (output) int*
 *        = 0: successful exit
 *        < 0: if info = -i, the i-th argument had an illegal value
 *        
 * Internal Parameters   
 * ===================   
 *
 * ITMAX is the maximum number of steps of iterative refinement.   
 * 
*/ void pdgsrfs(int_t n, SuperMatrix *A, double anorm, LUstruct_t *LUstruct, ScalePermstruct_t *ScalePermstruct, gridinfo_t *grid, double *B, int_t ldb, double *X, int_t ldx, int nrhs, SOLVEstruct_t *SOLVEstruct, double *berr, SuperLUStat_t *stat, int *info) { #define ITMAX 20 Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; double *ax, *R, *dx, *temp, *work, *B_col, *X_col; int_t count, i, j, lwork, nz; int iam; double eps, lstres; double s, safmin, safe1, safe2; /* Data structures used by matrix-vector multiply routine. */ pdgsmv_comm_t *gsmv_comm = SOLVEstruct->gsmv_comm; NRformat_loc *Astore; int_t m_loc, fst_row; /* Initialization. */ Astore = (NRformat_loc *) A->Store; m_loc = Astore->m_loc; fst_row = Astore->fst_row; iam = grid->iam; /* Test the input parameters. */ *info = 0; if ( n < 0 ) *info = -1; else if ( A->nrow != A->ncol || A->nrow < 0 || A->Stype != SLU_NR_loc || A->Dtype != SLU_D || A->Mtype != SLU_GE ) *info = -2; else if ( ldb < SUPERLU_MAX(0, m_loc) ) *info = -10; else if ( ldx < SUPERLU_MAX(0, m_loc) ) *info = -12; else if ( nrhs < 0 ) *info = -13; if (*info != 0) { i = -(*info); pxerr_dist("PDGSRFS", grid, i); return; } /* Quick return if possible. */ if ( n == 0 || nrhs == 0 ) { return; } #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter pdgsrfs()"); #endif lwork = 2 * m_loc; /* For ax/R/dx and temp */ if ( !(work = doubleMalloc_dist(lwork)) ) ABORT("Malloc fails for work[]"); ax = R = dx = work; temp = ax + m_loc; /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ nz = A->ncol + 1; eps = dmach_dist("Epsilon"); safmin = dmach_dist("Safe minimum"); /* Set SAFE1 essentially to be the underflow threshold times the number of additions in each row. */ safe1 = nz * safmin; safe2 = safe1 / eps; #if ( DEBUGlevel>=1 ) if ( !iam ) printf(".. eps = %e\tanorm = %e\tsafe1 = %e\tsafe2 = %e\n", eps, anorm, safe1, safe2); #endif /* Do for each right-hand side ... */ for (j = 0; j < nrhs; ++j) { count = 0; lstres = 3.; B_col = &B[j*ldb]; X_col = &X[j*ldx]; while (1) { /* Loop until stopping criterion is satisfied. */ /* Compute residual R = B - op(A) * X, where op(A) = A, A**T, or A**H, depending on TRANS. */ /* Matrix-vector multiply. */ pdgsmv(0, A, grid, gsmv_comm, X_col, ax); /* Compute residual, stored in R[]. */ for (i = 0; i < m_loc; ++i) R[i] = B_col[i] - ax[i]; /* Compute abs(op(A))*abs(X) + abs(B), stored in temp[]. */ pdgsmv(1, A, grid, gsmv_comm, X_col, temp); for (i = 0; i < m_loc; ++i) temp[i] += fabs(B_col[i]); s = 0.0; for (i = 0; i < m_loc; ++i) { if ( temp[i] > safe2 ) { s = SUPERLU_MAX(s, fabs(R[i]) / temp[i]); } else if ( temp[i] != 0.0 ) { /* Adding SAFE1 to the numerator guards against spuriously zero residuals (underflow). */ s = SUPERLU_MAX(s, (safe1 + fabs(R[i])) /temp[i]); } /* If temp[i] is exactly 0.0 (computed by PxGSMV), then we know the true residual also must be exactly 0.0. */ } MPI_Allreduce( &s, &berr[j], 1, MPI_DOUBLE, MPI_MAX, grid->comm ); #if ( PRNTlevel>= 1 ) if ( !iam ) printf("(%2d) .. Step " IFMT ": berr[j] = %e\n", iam, count, berr[j]); #endif if ( berr[j] > eps && berr[j] * 2 <= lstres && count < ITMAX ) { /* Compute new dx. */ pdgstrs(n, LUstruct, ScalePermstruct, grid, dx, m_loc, fst_row, m_loc, 1, SOLVEstruct, stat, info); /* Update solution. */ for (i = 0; i < m_loc; ++i) X_col[i] += dx[i]; lstres = berr[j]; ++count; } else { break; } } /* end while */ stat->RefineSteps = count; } /* for j ... */ /* Deallocate storage. */ SUPERLU_FREE(work); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pdgsrfs()"); #endif } /* PDGSRFS */ SuperLU_DIST_5.3.0/SRC/CMakeLists.txt0000644013363400111340000000661613233431301016034 0ustar xiaoyessgset(headers Cnames.h cublas_utils.h dcomplex.h machines.h psymbfact.h superlu_defs.h superlu_enum_consts.h supermatrix.h util_dist.h colamd.h ${CMAKE_CURRENT_BINARY_DIR}/superlu_dist_config.h ) if (MSVC) list(APPEND headers wingetopt.h) endif () # first: precision-independent files set(sources sp_ienv.c etree.c sp_colorder.c get_perm_c.c mmd.c comm.c memory.c util.c superlu_grid.c pxerr_dist.c superlu_timer.c symbfact.c psymbfact.c psymbfact_util.c get_perm_c_parmetis.c mc64ad_dist.c static_schedule.c xerr_dist.c smach_dist.c dmach_dist.c colamd.c superlu_dist_version.c ) if (MSVC) list(APPEND sources wingetopt.c) endif () set_source_files_properties(superlu_timer.c PROPERTIES COMPILE_FLAGS -O0) if(enable_double) list(APPEND headers superlu_ddefs.h) list(APPEND sources dlangs_dist.c dgsequ_dist.c dlaqgs_dist.c dutil_dist.c dmemory_dist.c dmyblas2_dist.c dsp_blas2_dist.c dsp_blas3_dist.c pdgssvx.c pdgssvx_ABglobal.c dreadhb.c dreadrb.c dreadtriple.c dreadMM.c pdgsequ.c pdlaqgs.c dldperm_dist.c pdlangs.c pdutil.c pdsymbfact_distdata.c ddistribute.c pddistribute.c pdgstrf.c pdgstrf2.c pdgstrs.c pdgstrs1.c pdgstrs_lsum.c pdgstrs_Bglobal.c pdgsrfs.c pdgsmv.c pdgsrfs_ABXglobal.c pdgsmv_AXglobal.c pdGetDiagU.c ) endif() if(enable_complex16) list(APPEND headers superlu_zdefs.h) list(APPEND sources dcomplex_dist.c zlangs_dist.c zgsequ_dist.c zlaqgs_dist.c zutil_dist.c zmemory_dist.c zmyblas2_dist.c zsp_blas2_dist.c zsp_blas3_dist.c pzgssvx.c pzgssvx_ABglobal.c zreadhb.c zreadrb.c zreadtriple.c zreadMM.c pzgsequ.c pzlaqgs.c zldperm_dist.c pzlangs.c pzutil.c pzsymbfact_distdata.c zdistribute.c pzdistribute.c pzgstrf.c pzgstrf2.c pzgstrs.c pzgstrs1.c pzgstrs_lsum.c pzgstrs_Bglobal.c pzgsrfs.c pzgsmv.c pzgsrfs_ABXglobal.c pzgsmv_AXglobal.c pzGetDiagU.c ) endif() add_library(superlu_dist ${sources} ${HEADERS}) set(targets superlu_dist) if (BUILD_SHARED_LIBS AND BUILD_STATIC_LIBS) # build both shared and static libs add_library(superlu_dist-static STATIC ${sources} ${HEADERS}) list(APPEND targets superlu_dist-static) endif() set(superlu_dist_libs ${MPI_C_LIBRARIES} ${BLAS_LIB} ${PARMETIS_LIB}) if (NOT MSVC) list(APPEND superlu_dist_libs m) endif () foreach(target ${targets}) target_link_libraries(${target} ${superlu_dist_libs}) set_target_properties(${target} PROPERTIES OUTPUT_NAME superlu_dist VERSION ${PROJECT_VERSION} SOVERSION ${VERSION_MAJOR} ) endforeach(target) target_compile_definitions(superlu_dist PRIVATE SUPERLU_DIST_EXPORTS) if(MSVC AND BUILD_SHARED_LIBS) set_target_properties(superlu_dist PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON ) endif() # Define GNU standard installation directories include(GNUInstallDirs) install(TARGETS ${targets} # DESTINATION ${CMAKE_INSTALL_LIBDIR} RUNTIME DESTINATION "${INSTALL_BIN_DIR}" LIBRARY DESTINATION "${INSTALL_LIB_DIR}" ARCHIVE DESTINATION "${INSTALL_LIB_DIR}" ) install(FILES ${headers} # DESTINATION ${CMAKE_INSTALL_PREFIX}/include) DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} ) SuperLU_DIST_5.3.0/SRC/pdgssvx.c0000644013363400111340000016011213233431301015126 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Solves a system of linear equations A*X=B * *
 * -- Distributed SuperLU routine (version 5.1.3) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * November 1, 2007
 * October 22, 2012
 * October  1, 2014
 * April 5, 2015
 * December 31, 2015  version 4.3
 * December 31, 2016  version 5.1.3
 * 
*/ #include #include "superlu_ddefs.h" /*! \brief * *
 * Purpose
 * =======
 *
 * PDGSSVX solves a system of linear equations A*X=B,
 * by using Gaussian elimination with "static pivoting" to
 * compute the LU factorization of A.
 *
 * Static pivoting is a technique that combines the numerical stability
 * of partial pivoting with the scalability of Cholesky (no pivoting),
 * to run accurately and efficiently on large numbers of processors.
 * See our paper at http://www.nersc.gov/~xiaoye/SuperLU/ for a detailed
 * description of the parallel algorithms.
 *
 * The input matrices A and B are distributed by block rows.
 * Here is a graphical illustration (0-based indexing):
 *
 *                        A                B
 *               0 ---------------       ------
 *                   |           |        |  |
 *                   |           |   P0   |  |
 *                   |           |        |  |
 *                 ---------------       ------
 *        - fst_row->|           |        |  |
 *        |          |           |        |  |
 *       m_loc       |           |   P1   |  |
 *        |          |           |        |  |
 *        -          |           |        |  |
 *                 ---------------       ------
 *                   |    .      |        |. |
 *                   |    .      |        |. |
 *                   |    .      |        |. |
 *                 ---------------       ------
 * 
 * where, fst_row is the row number of the first row,
 *        m_loc is the number of rows local to this processor
 * These are defined in the 'SuperMatrix' structure, see supermatrix.h.
 *
 *
 * Here are the options for using this code:
 *
 *   1. Independent of all the other options specified below, the
 *      user must supply
 *
 *      -  B, the matrix of right-hand sides, distributed by block rows,
 *            and its dimensions ldb (local) and nrhs (global)
 *      -  grid, a structure describing the 2D processor mesh
 *      -  options->IterRefine, which determines whether or not to
 *            improve the accuracy of the computed solution using 
 *            iterative refinement
 *
 *      On output, B is overwritten with the solution X.
 *
 *   2. Depending on options->Fact, the user has four options
 *      for solving A*X=B. The standard option is for factoring
 *      A "from scratch". (The other options, described below,
 *      are used when A is sufficiently similar to a previously 
 *      solved problem to save time by reusing part or all of 
 *      the previous factorization.)
 *
 *      -  options->Fact = DOFACT: A is factored "from scratch"
 *
 *      In this case the user must also supply
 *
 *        o  A, the input matrix
 *
 *        as well as the following options to determine what matrix to
 *        factorize.
 *
 *        o  options->Equil,   to specify how to scale the rows and columns
 *                             of A to "equilibrate" it (to try to reduce its
 *                             condition number and so improve the
 *                             accuracy of the computed solution)
 *
 *        o  options->RowPerm, to specify how to permute the rows of A
 *                             (typically to control numerical stability)
 *
 *        o  options->ColPerm, to specify how to permute the columns of A
 *                             (typically to control fill-in and enhance
 *                             parallelism during factorization)
 *
 *        o  options->ReplaceTinyPivot, to specify how to deal with tiny
 *                             pivots encountered during factorization
 *                             (to control numerical stability)
 *
 *      The outputs returned include
 *         
 *        o  ScalePermstruct,  modified to describe how the input matrix A
 *                             was equilibrated and permuted:
 *          .  ScalePermstruct->DiagScale, indicates whether the rows and/or
 *                                         columns of A were scaled
 *          .  ScalePermstruct->R, array of row scale factors
 *          .  ScalePermstruct->C, array of column scale factors
 *          .  ScalePermstruct->perm_r, row permutation vector
 *          .  ScalePermstruct->perm_c, column permutation vector
 *
 *          (part of ScalePermstruct may also need to be supplied on input,
 *           depending on options->RowPerm and options->ColPerm as described 
 *           later).
 *
 *        o  A, the input matrix A overwritten by the scaled and permuted
 *              matrix diag(R)*A*diag(C)*Pc^T, where 
 *              Pc is the row permutation matrix determined by
 *                  ScalePermstruct->perm_c
 *              diag(R) and diag(C) are diagonal scaling matrices determined
 *                  by ScalePermstruct->DiagScale, ScalePermstruct->R and 
 *                  ScalePermstruct->C
 *
 *        o  LUstruct, which contains the L and U factorization of A1 where
 *
 *                A1 = Pc*Pr*diag(R)*A*diag(C)*Pc^T = L*U
 *
 *               (Note that A1 = Pc*Pr*Aout, where Aout is the matrix stored
 *                in A on output.)
 *
 *   3. The second value of options->Fact assumes that a matrix with the same
 *      sparsity pattern as A has already been factored:
 *     
 *      -  options->Fact = SamePattern: A is factored, assuming that it has
 *            the same nonzero pattern as a previously factored matrix. In
 *            this case the algorithm saves time by reusing the previously
 *            computed column permutation vector stored in
 *            ScalePermstruct->perm_c and the "elimination tree" of A
 *            stored in LUstruct->etree
 *
 *      In this case the user must still specify the following options
 *      as before:
 *
 *        o  options->Equil
 *        o  options->RowPerm
 *        o  options->ReplaceTinyPivot
 *
 *      but not options->ColPerm, whose value is ignored. This is because the
 *      previous column permutation from ScalePermstruct->perm_c is used as
 *      input. The user must also supply 
 *
 *        o  A, the input matrix
 *        o  ScalePermstruct->perm_c, the column permutation
 *        o  LUstruct->etree, the elimination tree
 *
 *      The outputs returned include
 *         
 *        o  A, the input matrix A overwritten by the scaled and permuted
 *              matrix as described above
 *        o  ScalePermstruct, modified to describe how the input matrix A was
 *                            equilibrated and row permuted
 *        o  LUstruct, modified to contain the new L and U factors
 *
 *   4. The third value of options->Fact assumes that a matrix B with the same
 *      sparsity pattern as A has already been factored, and where the
 *      row permutation of B can be reused for A. This is useful when A and B
 *      have similar numerical values, so that the same row permutation
 *      will make both factorizations numerically stable. This lets us reuse
 *      all of the previously computed structure of L and U.
 *
 *      -  options->Fact = SamePattern_SameRowPerm: A is factored,
 *            assuming not only the same nonzero pattern as the previously
 *            factored matrix B, but reusing B's row permutation.
 *
 *      In this case the user must still specify the following options
 *      as before:
 *
 *        o  options->Equil
 *        o  options->ReplaceTinyPivot
 *
 *      but not options->RowPerm or options->ColPerm, whose values are
 *      ignored. This is because the permutations from ScalePermstruct->perm_r
 *      and ScalePermstruct->perm_c are used as input.
 *
 *      The user must also supply 
 *
 *        o  A, the input matrix
 *        o  ScalePermstruct->DiagScale, how the previous matrix was row
 *                                       and/or column scaled
 *        o  ScalePermstruct->R, the row scalings of the previous matrix,
 *                               if any
 *        o  ScalePermstruct->C, the columns scalings of the previous matrix, 
 *                               if any
 *        o  ScalePermstruct->perm_r, the row permutation of the previous
 *                                    matrix
 *        o  ScalePermstruct->perm_c, the column permutation of the previous 
 *                                    matrix
 *        o  all of LUstruct, the previously computed information about
 *                            L and U (the actual numerical values of L and U
 *                            stored in LUstruct->Llu are ignored)
 *
 *      The outputs returned include
 *         
 *        o  A, the input matrix A overwritten by the scaled and permuted
 *              matrix as described above
 *        o  ScalePermstruct,  modified to describe how the input matrix A was
 *                             equilibrated (thus ScalePermstruct->DiagScale,
 *                             R and C may be modified)
 *        o  LUstruct, modified to contain the new L and U factors
 *
 *   5. The fourth and last value of options->Fact assumes that A is
 *      identical to a matrix that has already been factored on a previous 
 *      call, and reuses its entire LU factorization
 *
 *      -  options->Fact = Factored: A is identical to a previously
 *            factorized matrix, so the entire previous factorization
 *            can be reused.
 *
 *      In this case all the other options mentioned above are ignored
 *      (options->Equil, options->RowPerm, options->ColPerm, 
 *       options->ReplaceTinyPivot)
 *
 *      The user must also supply 
 *
 *        o  A, the unfactored matrix, only in the case that iterative
 *              refinement is to be done (specifically A must be the output
 *              A from the previous call, so that it has been scaled and permuted)
 *        o  all of ScalePermstruct
 *        o  all of LUstruct, including the actual numerical values of
 *           L and U
 *
 *      all of which are unmodified on output.
 *         
 * Arguments
 * =========
 *
 * options (input) superlu_dist_options_t* (global)
 *         The structure defines the input parameters to control
 *         how the LU decomposition will be performed.
 *         The following fields should be defined for this structure:
 *         
 *         o Fact (fact_t)
 *           Specifies whether or not the factored form of the matrix
 *           A is supplied on entry, and if not, how the matrix A should
 *           be factorized based on the previous history.
 *
 *           = DOFACT: The matrix A will be factorized from scratch.
 *                 Inputs:  A
 *                          options->Equil, RowPerm, ColPerm, ReplaceTinyPivot
 *                 Outputs: modified A
 *                             (possibly row and/or column scaled and/or 
 *                              permuted)
 *                          all of ScalePermstruct
 *                          all of LUstruct
 *
 *           = SamePattern: the matrix A will be factorized assuming
 *             that a factorization of a matrix with the same sparsity
 *             pattern was performed prior to this one. Therefore, this
 *             factorization will reuse column permutation vector 
 *             ScalePermstruct->perm_c and the elimination tree
 *             LUstruct->etree
 *                 Inputs:  A
 *                          options->Equil, RowPerm, ReplaceTinyPivot
 *                          ScalePermstruct->perm_c
 *                          LUstruct->etree
 *                 Outputs: modified A
 *                             (possibly row and/or column scaled and/or 
 *                              permuted)
 *                          rest of ScalePermstruct (DiagScale, R, C, perm_r)
 *                          rest of LUstruct (GLU_persist, Llu)
 *
 *           = SamePattern_SameRowPerm: the matrix A will be factorized
 *             assuming that a factorization of a matrix with the same
 *             sparsity	pattern and similar numerical values was performed
 *             prior to this one. Therefore, this factorization will reuse
 *             both row and column scaling factors R and C, and the
 *             both row and column permutation vectors perm_r and perm_c,
 *             distributed data structure set up from the previous symbolic
 *             factorization.
 *                 Inputs:  A
 *                          options->Equil, ReplaceTinyPivot
 *                          all of ScalePermstruct
 *                          all of LUstruct
 *                 Outputs: modified A
 *                             (possibly row and/or column scaled and/or 
 *                              permuted)
 *                          modified LUstruct->Llu
 *           = FACTORED: the matrix A is already factored.
 *                 Inputs:  all of ScalePermstruct
 *                          all of LUstruct
 *
 *         o Equil (yes_no_t)
 *           Specifies whether to equilibrate the system.
 *           = NO:  no equilibration.
 *           = YES: scaling factors are computed to equilibrate the system:
 *                      diag(R)*A*diag(C)*inv(diag(C))*X = diag(R)*B.
 *                  Whether or not the system will be equilibrated depends
 *                  on the scaling of the matrix A, but if equilibration is
 *                  used, A is overwritten by diag(R)*A*diag(C) and B by
 *                  diag(R)*B.
 *
 *         o RowPerm (rowperm_t)
 *           Specifies how to permute rows of the matrix A.
 *           = NATURAL:   use the natural ordering.
 *           = LargeDiag: use the Duff/Koster algorithm to permute rows of
 *                        the original matrix to make the diagonal large
 *                        relative to the off-diagonal.
 *           = MY_PERMR:  use the ordering given in ScalePermstruct->perm_r
 *                        input by the user.
 *           
 *         o ColPerm (colperm_t)
 *           Specifies what type of column permutation to use to reduce fill.
 *           = NATURAL:       natural ordering.
 *           = MMD_AT_PLUS_A: minimum degree ordering on structure of A'+A.
 *           = MMD_ATA:       minimum degree ordering on structure of A'*A.
 *           = MY_PERMC:      the ordering given in ScalePermstruct->perm_c.
 *         
 *         o ReplaceTinyPivot (yes_no_t)
 *           = NO:  do not modify pivots
 *           = YES: replace tiny pivots by sqrt(epsilon)*norm(A) during 
 *                  LU factorization.
 *
 *         o IterRefine (IterRefine_t)
 *           Specifies how to perform iterative refinement.
 *           = NO:     no iterative refinement.
 *           = SLU_DOUBLE: accumulate residual in double precision.
 *           = SLU_EXTRA:  accumulate residual in extra precision.
 *
 *         NOTE: all options must be identical on all processes when
 *               calling this routine.
 *
 * A (input/output) SuperMatrix* (local)
 *         On entry, matrix A in A*X=B, of dimension (A->nrow, A->ncol).
 *           The number of linear equations is A->nrow. The type of A must be:
 *           Stype = SLU_NR_loc; Dtype = SLU_D; Mtype = SLU_GE.
 *           That is, A is stored in distributed compressed row format.
 *           See supermatrix.h for the definition of 'SuperMatrix'.
 *           This routine only handles square A, however, the LU factorization
 *           routine PDGSTRF can factorize rectangular matrices.
 *         On exit, A may be overwtirren by diag(R)*A*diag(C)*Pc^T,
 *           depending on ScalePermstruct->DiagScale and options->ColPerm:
 *             if ScalePermstruct->DiagScale != NOEQUIL, A is overwritten by
 *                diag(R)*A*diag(C).
 *             if options->ColPerm != NATURAL, A is further overwritten by
 *                diag(R)*A*diag(C)*Pc^T.
 *           If all the above condition are true, the LU decomposition is
 *           performed on the matrix Pc*Pr*diag(R)*A*diag(C)*Pc^T.
 *
 * ScalePermstruct (input/output) ScalePermstruct_t* (global)
 *         The data structure to store the scaling and permutation vectors
 *         describing the transformations performed to the matrix A.
 *         It contains the following fields:
 *
 *         o DiagScale (DiagScale_t)
 *           Specifies the form of equilibration that was done.
 *           = NOEQUIL: no equilibration.
 *           = ROW:     row equilibration, i.e., A was premultiplied by
 *                      diag(R).
 *           = COL:     Column equilibration, i.e., A was postmultiplied
 *                      by diag(C).
 *           = BOTH:    both row and column equilibration, i.e., A was 
 *                      replaced by diag(R)*A*diag(C).
 *           If options->Fact = FACTORED or SamePattern_SameRowPerm,
 *           DiagScale is an input argument; otherwise it is an output
 *           argument.
 *
 *         o perm_r (int*)
 *           Row permutation vector, which defines the permutation matrix Pr;
 *           perm_r[i] = j means row i of A is in position j in Pr*A.
 *           If options->RowPerm = MY_PERMR, or
 *           options->Fact = SamePattern_SameRowPerm, perm_r is an
 *           input argument; otherwise it is an output argument.
 *
 *         o perm_c (int*)
 *           Column permutation vector, which defines the 
 *           permutation matrix Pc; perm_c[i] = j means column i of A is 
 *           in position j in A*Pc.
 *           If options->ColPerm = MY_PERMC or options->Fact = SamePattern
 *           or options->Fact = SamePattern_SameRowPerm, perm_c is an
 *           input argument; otherwise, it is an output argument.
 *           On exit, perm_c may be overwritten by the product of the input
 *           perm_c and a permutation that postorders the elimination tree
 *           of Pc*A'*A*Pc'; perm_c is not changed if the elimination tree
 *           is already in postorder.
 *
 *         o R (double*) dimension (A->nrow)
 *           The row scale factors for A.
 *           If DiagScale = ROW or BOTH, A is multiplied on the left by 
 *                          diag(R).
 *           If DiagScale = NOEQUIL or COL, R is not defined.
 *           If options->Fact = FACTORED or SamePattern_SameRowPerm, R is
 *           an input argument; otherwise, R is an output argument.
 *
 *         o C (double*) dimension (A->ncol)
 *           The column scale factors for A.
 *           If DiagScale = COL or BOTH, A is multiplied on the right by 
 *                          diag(C).
 *           If DiagScale = NOEQUIL or ROW, C is not defined.
 *           If options->Fact = FACTORED or SamePattern_SameRowPerm, C is
 *           an input argument; otherwise, C is an output argument.
 *         
 * B       (input/output) double* (local)
 *         On entry, the right-hand side matrix of dimension (m_loc, nrhs),
 *           where, m_loc is the number of rows stored locally on my
 *           process and is defined in the data structure of matrix A.
 *         On exit, the solution matrix if info = 0;
 *
 * ldb     (input) int (local)
 *         The leading dimension of matrix B.
 *
 * nrhs    (input) int (global)
 *         The number of right-hand sides.
 *         If nrhs = 0, only LU decomposition is performed, the forward
 *         and back substitutions are skipped.
 *
 * grid    (input) gridinfo_t* (global)
 *         The 2D process mesh. It contains the MPI communicator, the number
 *         of process rows (NPROW), the number of process columns (NPCOL),
 *         and my process rank. It is an input argument to all the
 *         parallel routines.
 *         Grid can be initialized by subroutine SUPERLU_GRIDINIT.
 *         See superlu_ddefs.h for the definition of 'gridinfo_t'.
 *
 * LUstruct (input/output) LUstruct_t*
 *         The data structures to store the distributed L and U factors.
 *         It contains the following fields:
 *
 *         o etree (int*) dimension (A->ncol) (global)
 *           Elimination tree of Pc*(A'+A)*Pc' or Pc*A'*A*Pc'.
 *           It is computed in sp_colorder() during the first factorization,
 *           and is reused in the subsequent factorizations of the matrices
 *           with the same nonzero pattern.
 *           On exit of sp_colorder(), the columns of A are permuted so that
 *           the etree is in a certain postorder. This postorder is reflected
 *           in ScalePermstruct->perm_c.
 *           NOTE:
 *           Etree is a vector of parent pointers for a forest whose vertices
 *           are the integers 0 to A->ncol-1; etree[root]==A->ncol.
 *
 *         o Glu_persist (Glu_persist_t*) (global)
 *           Global data structure (xsup, supno) replicated on all processes,
 *           describing the supernode partition in the factored matrices
 *           L and U:
 *	       xsup[s] is the leading column of the s-th supernode,
 *             supno[i] is the supernode number to which column i belongs.
 *
 *         o Llu (LocalLU_t*) (local)
 *           The distributed data structures to store L and U factors.
 *           See superlu_ddefs.h for the definition of 'LocalLU_t'.
 *
 * SOLVEstruct (input/output) SOLVEstruct_t*
 *         The data structure to hold the communication pattern used
 *         in the phases of triangular solution and iterative refinement.
 *         This pattern should be initialized only once for repeated solutions.
 *         If options->SolveInitialized = YES, it is an input argument.
 *         If options->SolveInitialized = NO and nrhs != 0, it is an output
 *         argument. See superlu_ddefs.h for the definition of 'SOLVEstruct_t'.
 *
 * berr    (output) double*, dimension (nrhs) (global)
 *         The componentwise relative backward error of each solution   
 *         vector X(j) (i.e., the smallest relative change in   
 *         any element of A or B that makes X(j) an exact solution).
 *
 * stat   (output) SuperLUStat_t*
 *        Record the statistics on runtime and floating-point operation count.
 *        See util.h for the definition of 'SuperLUStat_t'.
 *
 * info    (output) int*
 *         = 0: successful exit
 *         > 0: if info = i, and i is
 *             <= A->ncol: U(i,i) is exactly zero. The factorization has
 *                been completed, but the factor U is exactly singular,
 *                so the solution could not be computed.
 *             > A->ncol: number of bytes allocated when memory allocation
 *                failure occurred, plus A->ncol.
 *
 * See superlu_ddefs.h for the definitions of various data types.
 * 
*/ void pdgssvx(superlu_dist_options_t *options, SuperMatrix *A, ScalePermstruct_t *ScalePermstruct, double B[], int ldb, int nrhs, gridinfo_t *grid, LUstruct_t *LUstruct, SOLVEstruct_t *SOLVEstruct, double *berr, SuperLUStat_t *stat, int *info) { NRformat_loc *Astore; SuperMatrix GA; /* Global A in NC format */ NCformat *GAstore; double *a_GA; SuperMatrix GAC; /* Global A in NCP format (add n end pointers) */ NCPformat *GACstore; Glu_persist_t *Glu_persist = LUstruct->Glu_persist; Glu_freeable_t *Glu_freeable; /* The nonzero structures of L and U factors, which are replicated on all processrs. (lsub, xlsub) contains the compressed subscript of supernodes in L. (usub, xusub) contains the compressed subscript of nonzero segments in U. If options->Fact != SamePattern_SameRowPerm, they are computed by SYMBFACT routine, and then used by PDDISTRIBUTE routine. They will be freed after PDDISTRIBUTE routine. If options->Fact == SamePattern_SameRowPerm, these structures are not used. */ fact_t Fact; double *a; int_t *colptr, *rowind; int_t *perm_r; /* row permutations from partial pivoting */ int_t *perm_c; /* column permutation vector */ int_t *etree; /* elimination tree */ int_t *rowptr, *colind; /* Local A in NR*/ int_t colequ, Equil, factored, job, notran, rowequ, need_value; int_t i, iinfo, j, irow, m, n, nnz, permc_spec; int_t nnz_loc, m_loc, fst_row, icol; int iam; int ldx; /* LDA for matrix X (local). */ char equed[1], norm[1]; double *C, *R, *C1, *R1, amax, anorm, colcnd, rowcnd; double *X, *b_col, *b_work, *x_col; double t; float GA_mem_use; /* memory usage by global A */ float dist_mem_use; /* memory usage during distribution */ superlu_dist_mem_usage_t num_mem_usage, symb_mem_usage; #if ( PRNTlevel>= 2 ) double dmin, dsum, dprod; #endif /* Structures needed for parallel symbolic factorization */ int_t *sizes, *fstVtxSep, parSymbFact; int noDomains, nprocs_num; MPI_Comm symb_comm; /* communicator for symbolic factorization */ int col, key; /* parameters for creating a new communicator */ Pslu_freeable_t Pslu_freeable; float flinfo; /* Initialization. */ m = A->nrow; n = A->ncol; Astore = (NRformat_loc *) A->Store; nnz_loc = Astore->nnz_loc; m_loc = Astore->m_loc; fst_row = Astore->fst_row; a = (double *) Astore->nzval; rowptr = Astore->rowptr; colind = Astore->colind; sizes = NULL; fstVtxSep = NULL; symb_comm = MPI_COMM_NULL; /* Test the input parameters. */ *info = 0; Fact = options->Fact; if ( Fact < 0 || Fact > FACTORED ) *info = -1; else if ( options->RowPerm < 0 || options->RowPerm > MY_PERMR ) *info = -1; else if ( options->ColPerm < 0 || options->ColPerm > MY_PERMC ) *info = -1; else if ( options->IterRefine < 0 || options->IterRefine > SLU_EXTRA ) *info = -1; else if ( options->IterRefine == SLU_EXTRA ) { *info = -1; printf("ERROR: Extra precise iterative refinement yet to support.\n"); } else if ( A->nrow != A->ncol || A->nrow < 0 || A->Stype != SLU_NR_loc || A->Dtype != SLU_D || A->Mtype != SLU_GE ) *info = -2; else if ( ldb < m_loc ) *info = -5; else if ( nrhs < 0 ) *info = -6; if ( sp_ienv_dist(2) > sp_ienv_dist(3) ) { *info = 1; printf("ERROR: Relaxation (NREL) cannot be larger than max. supernode size (NSUP).\n" "\t-> Check parameter setting in sp_ienv_dist.c to correct error.\n"); } if ( *info ) { i = -(*info); pxerr_dist("pdgssvx", grid, -*info); return; } factored = (Fact == FACTORED); Equil = (!factored && options->Equil == YES); notran = (options->Trans == NOTRANS); parSymbFact = options->ParSymbFact; iam = grid->iam; job = 5; if ( factored || (Fact == SamePattern_SameRowPerm && Equil) ) { rowequ = (ScalePermstruct->DiagScale == ROW) || (ScalePermstruct->DiagScale == BOTH); colequ = (ScalePermstruct->DiagScale == COL) || (ScalePermstruct->DiagScale == BOTH); } else rowequ = colequ = FALSE; /* The following arrays are replicated on all processes. */ perm_r = ScalePermstruct->perm_r; perm_c = ScalePermstruct->perm_c; etree = LUstruct->etree; R = ScalePermstruct->R; C = ScalePermstruct->C; /********/ #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter pdgssvx()"); #endif /* Not factored & ask for equilibration */ if ( Equil && Fact != SamePattern_SameRowPerm ) { /* Allocate storage if not done so before. */ switch ( ScalePermstruct->DiagScale ) { case NOEQUIL: if ( !(R = (double *) doubleMalloc_dist(m)) ) ABORT("Malloc fails for R[]."); if ( !(C = (double *) doubleMalloc_dist(n)) ) ABORT("Malloc fails for C[]."); ScalePermstruct->R = R; ScalePermstruct->C = C; break; case ROW: if ( !(C = (double *) doubleMalloc_dist(n)) ) ABORT("Malloc fails for C[]."); ScalePermstruct->C = C; break; case COL: if ( !(R = (double *) doubleMalloc_dist(m)) ) ABORT("Malloc fails for R[]."); ScalePermstruct->R = R; break; } } /* ------------------------------------------------------------ * Diagonal scaling to equilibrate the matrix. (simple scheme) * for row i = 1:n, A(i,:) <- A(i,:) / max(abs(A(i,:)); * for column j = 1:n, A(:,j) <- A(:, j) / max(abs(A(:,j)) * ------------------------------------------------------------*/ if ( Equil ) { #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter equil"); #endif t = SuperLU_timer_(); if ( Fact == SamePattern_SameRowPerm ) { /* Reuse R and C. */ switch ( ScalePermstruct->DiagScale ) { case NOEQUIL: break; case ROW: irow = fst_row; for (j = 0; j < m_loc; ++j) { for (i = rowptr[j]; i < rowptr[j+1]; ++i) { a[i] *= R[irow]; /* Scale rows. */ } ++irow; } break; case COL: for (j = 0; j < m_loc; ++j) for (i = rowptr[j]; i < rowptr[j+1]; ++i){ icol = colind[i]; a[i] *= C[icol]; /* Scale columns. */ } break; case BOTH: irow = fst_row; for (j = 0; j < m_loc; ++j) { for (i = rowptr[j]; i < rowptr[j+1]; ++i) { icol = colind[i]; a[i] *= R[irow] * C[icol]; /* Scale rows and cols. */ } ++irow; } break; } } else { /* Compute R & C from scratch */ /* Compute the row and column scalings. */ pdgsequ(A, R, C, &rowcnd, &colcnd, &amax, &iinfo, grid); if ( iinfo > 0 ) { if ( iinfo <= m ) { #if ( PRNTlevel>=1 ) fprintf(stderr, "The " IFMT "-th row of A is exactly zero\n", iinfo); #endif } else { #if ( PRNTlevel>=1 ) fprintf(stderr, "The " IFMT "-th column of A is exactly zero\n", iinfo-n); #endif } } else if ( iinfo < 0 ) return; /* Now iinfo == 0 */ /* Equilibrate matrix A if it is badly-scaled. A <-- diag(R)*A*diag(C) */ pdlaqgs(A, R, C, rowcnd, colcnd, amax, equed); if ( strncmp(equed, "R", 1)==0 ) { ScalePermstruct->DiagScale = ROW; rowequ = ROW; } else if ( strncmp(equed, "C", 1)==0 ) { ScalePermstruct->DiagScale = COL; colequ = COL; } else if ( strncmp(equed, "B", 1)==0 ) { ScalePermstruct->DiagScale = BOTH; rowequ = ROW; colequ = COL; } else ScalePermstruct->DiagScale = NOEQUIL; #if ( PRNTlevel>=1 ) if ( !iam ) { printf(".. equilibrated? *equed = %c\n", *equed); fflush(stdout); } #endif } /* end if Fact ... */ stat->utime[EQUIL] = SuperLU_timer_() - t; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit equil"); #endif } /* end if Equil ... LAPACK style, not involving MC64 */ if ( !factored ) { /* Skip this if already factored. */ /* * For serial symbolic factorization, gather A from the distributed * compressed row format to global A in compressed column format. * Numerical values are gathered only when a row permutation * for large diagonal is sought after. */ if ( Fact != SamePattern_SameRowPerm && (parSymbFact == NO || options->RowPerm != NO) ) { /* Performs serial symbolic factorzation and/or MC64 */ need_value = (options->RowPerm == LargeDiag); pdCompRow_loc_to_CompCol_global(need_value, A, grid, &GA); GAstore = (NCformat *) GA.Store; colptr = GAstore->colptr; rowind = GAstore->rowind; nnz = GAstore->nnz; GA_mem_use = (nnz + n + 1) * sizeof(int_t); if ( need_value ) { a_GA = (double *) GAstore->nzval; GA_mem_use += nnz * sizeof(double); } else assert(GAstore->nzval == NULL); } /* ------------------------------------------------------------ Find the row permutation Pr for A, and apply Pr*[GA]. GA is overwritten by Pr*[GA]. ------------------------------------------------------------*/ if ( options->RowPerm != NO ) { t = SuperLU_timer_(); if ( Fact != SamePattern_SameRowPerm ) { if ( options->RowPerm == MY_PERMR ) { /* Use user's perm_r. */ /* Permute the global matrix GA for symbfact() */ for (i = 0; i < colptr[n]; ++i) { irow = rowind[i]; rowind[i] = perm_r[irow]; } } else { /* options->RowPerm == LargeDiag */ /* Get a new perm_r[] */ if ( job == 5 ) { /* Allocate storage for scaling factors. */ if ( !(R1 = doubleMalloc_dist(m)) ) ABORT("SUPERLU_MALLOC fails for R1[]"); if ( !(C1 = doubleMalloc_dist(n)) ) ABORT("SUPERLU_MALLOC fails for C1[]"); } if ( !iam ) { /* Process 0 finds a row permutation */ iinfo = dldperm_dist(job, m, nnz, colptr, rowind, a_GA, perm_r, R1, C1); MPI_Bcast( &iinfo, 1, mpi_int_t, 0, grid->comm ); if ( iinfo == 0 ) { MPI_Bcast( perm_r, m, mpi_int_t, 0, grid->comm ); if ( job == 5 && Equil ) { MPI_Bcast( R1, m, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( C1, n, MPI_DOUBLE, 0, grid->comm ); } } } else { MPI_Bcast( &iinfo, 1, mpi_int_t, 0, grid->comm ); if ( iinfo == 0 ) { MPI_Bcast( perm_r, m, mpi_int_t, 0, grid->comm ); if ( job == 5 && Equil ) { MPI_Bcast( R1, m, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( C1, n, MPI_DOUBLE, 0, grid->comm ); } } } if ( iinfo && job == 5) { /* Error return */ SUPERLU_FREE(R1); SUPERLU_FREE(C1); } #if ( PRNTlevel>=2 ) dmin = dmach_dist("Overflow"); dsum = 0.0; dprod = 1.0; #endif if ( iinfo == 0 ) { if ( job == 5 ) { if ( Equil ) { for (i = 0; i < n; ++i) { R1[i] = exp(R1[i]); C1[i] = exp(C1[i]); } /* Scale the distributed matrix further. A <-- diag(R1)*A*diag(C1) */ irow = fst_row; for (j = 0; j < m_loc; ++j) { for (i = rowptr[j]; i < rowptr[j+1]; ++i) { icol = colind[i]; a[i] *= R1[irow] * C1[icol]; #if ( PRNTlevel>=2 ) if ( perm_r[irow] == icol ) { /* New diagonal */ if ( job == 2 || job == 3 ) dmin = SUPERLU_MIN(dmin, fabs(a[i])); else if ( job == 4 ) dsum += fabs(a[i]); else if ( job == 5 ) dprod *= fabs(a[i]); } #endif } ++irow; } /* Multiply together the scaling factors -- R/C from simple scheme, R1/C1 from MC64. */ if ( rowequ ) for (i = 0; i < m; ++i) R[i] *= R1[i]; else for (i = 0; i < m; ++i) R[i] = R1[i]; if ( colequ ) for (i = 0; i < n; ++i) C[i] *= C1[i]; else for (i = 0; i < n; ++i) C[i] = C1[i]; ScalePermstruct->DiagScale = BOTH; rowequ = colequ = 1; } /* end Equil */ /* Now permute global GA to prepare for symbfact() */ for (j = 0; j < n; ++j) { for (i = colptr[j]; i < colptr[j+1]; ++i) { irow = rowind[i]; rowind[i] = perm_r[irow]; } } SUPERLU_FREE (R1); SUPERLU_FREE (C1); } else { /* job = 2,3,4 */ for (j = 0; j < n; ++j) { for (i = colptr[j]; i < colptr[j+1]; ++i) { irow = rowind[i]; rowind[i] = perm_r[irow]; } /* end for i ... */ } /* end for j ... */ } /* end else job ... */ } else { /* if iinfo != 0 */ for (i = 0; i < m; ++i) perm_r[i] = i; } #if ( PRNTlevel>=2 ) if ( job == 2 || job == 3 ) { if ( !iam ) printf("\tsmallest diagonal %e\n", dmin); } else if ( job == 4 ) { if ( !iam ) printf("\tsum of diagonal %e\n", dsum); } else if ( job == 5 ) { if ( !iam ) printf("\t product of diagonal %e\n", dprod); } #endif } /* end if options->RowPerm ... */ t = SuperLU_timer_() - t; stat->utime[ROWPERM] = t; #if ( PRNTlevel>=1 ) if ( !iam ) { printf(".. LDPERM job " IFMT "\t time: %.2f\n", job, t); fflush(stdout); } #endif } /* end if Fact ... */ } else { /* options->RowPerm == NOROWPERM / NATURAL */ for (i = 0; i < m; ++i) perm_r[i] = i; } #if ( DEBUGlevel>=2 ) if ( !iam ) PrintInt10("perm_r", m, perm_r); #endif } /* end if (!factored) */ if ( !factored || options->IterRefine ) { /* Compute norm(A), which will be used to adjust small diagonal. */ if ( notran ) *(unsigned char *)norm = '1'; else *(unsigned char *)norm = 'I'; anorm = pdlangs(norm, A, grid); #if ( PRNTlevel>=1 ) if ( !iam ) { printf(".. anorm %e\n", anorm); fflush(stdout); } #endif } /* ------------------------------------------------------------ Perform the LU factorization: symbolic factorization, redistribution, and numerical factorization. ------------------------------------------------------------*/ if ( !factored ) { t = SuperLU_timer_(); /* * Get column permutation vector perm_c[], according to permc_spec: * permc_spec = NATURAL: natural ordering * permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A * permc_spec = MMD_ATA: minimum degree on structure of A'*A * permc_spec = METIS_AT_PLUS_A: METIS on structure of A'+A * permc_spec = PARMETIS: parallel METIS on structure of A'+A * permc_spec = MY_PERMC: the ordering already supplied in perm_c[] */ permc_spec = options->ColPerm; if ( parSymbFact == YES || permc_spec == PARMETIS ) { nprocs_num = grid->nprow * grid->npcol; noDomains = (int) ( pow(2, ((int) LOG2( nprocs_num )))); /* create a new communicator for the first noDomains processes in grid->comm */ key = iam; if (iam < noDomains) col = 0; else col = MPI_UNDEFINED; MPI_Comm_split (grid->comm, col, key, &symb_comm ); if ( permc_spec == NATURAL || permc_spec == MY_PERMC ) { if ( permc_spec == NATURAL ) { for (j = 0; j < n; ++j) perm_c[j] = j; } if ( !(sizes = intMalloc_dist(2 * noDomains)) ) ABORT("SUPERLU_MALLOC fails for sizes."); if ( !(fstVtxSep = intMalloc_dist(2 * noDomains)) ) ABORT("SUPERLU_MALLOC fails for fstVtxSep."); for (i = 0; i < 2*noDomains - 2; ++i) { sizes[i] = 0; fstVtxSep[i] = 0; } sizes[2*noDomains - 2] = m; fstVtxSep[2*noDomains - 2] = 0; } else if ( permc_spec != PARMETIS ) { /* same as before */ printf("{" IFMT "," IFMT "}: pdgssvx: invalid ColPerm option when ParSymbfact is used\n", MYROW(grid->iam, grid), MYCOL(grid->iam, grid)); } } if ( permc_spec != MY_PERMC && Fact == DOFACT ) { /* Reuse perm_c if Fact == SamePattern, or SamePattern_SameRowPerm */ if ( permc_spec == PARMETIS ) { /* Get column permutation vector in perm_c. * * This routine takes as input the distributed input matrix A * * and does not modify it. It also allocates memory for * * sizes[] and fstVtxSep[] arrays, that contain information * * on the separator tree computed by ParMETIS. */ flinfo = get_perm_c_parmetis(A, perm_r, perm_c, nprocs_num, noDomains, &sizes, &fstVtxSep, grid, &symb_comm); if (flinfo > 0) { #if ( PRNTlevel>=1 ) fprintf(stderr, "Insufficient memory for get_perm_c parmetis\n"); #endif *info = flinfo; return; } } else { get_perm_c_dist(iam, permc_spec, &GA, perm_c); } } stat->utime[COLPERM] = SuperLU_timer_() - t; /* Compute the elimination tree of Pc*(A^T+A)*Pc^T or Pc*A^T*A*Pc^T (a.k.a. column etree), depending on the choice of ColPerm. Adjust perm_c[] to be consistent with a postorder of etree. Permute columns of A to form A*Pc'. */ if ( Fact != SamePattern_SameRowPerm ) { if ( parSymbFact == NO ) { /* Perform serial symbolic factorization */ /* GA = Pr*A, perm_r[] is already applied. */ int_t *GACcolbeg, *GACcolend, *GACrowind; /* After this routine, GAC = GA*Pc^T. */ sp_colorder(options, &GA, perm_c, etree, &GAC); /* Form Pc*A*Pc^T to preserve the diagonal of the matrix GAC. */ GACstore = (NCPformat *) GAC.Store; GACcolbeg = GACstore->colbeg; GACcolend = GACstore->colend; GACrowind = GACstore->rowind; for (j = 0; j < n; ++j) { for (i = GACcolbeg[j]; i < GACcolend[j]; ++i) { irow = GACrowind[i]; GACrowind[i] = perm_c[irow]; } } /* Perform a symbolic factorization on Pc*Pr*A*Pc^T and set up the nonzero data structures for L & U. */ #if ( PRNTlevel>=1 ) if ( !iam ) { printf(".. symbfact(): relax " IFMT ", maxsuper " IFMT ", fill " IFMT "\n", sp_ienv_dist(2), sp_ienv_dist(3), sp_ienv_dist(6)); fflush(stdout); } #endif t = SuperLU_timer_(); if ( !(Glu_freeable = (Glu_freeable_t *) SUPERLU_MALLOC(sizeof(Glu_freeable_t))) ) ABORT("Malloc fails for Glu_freeable."); /* Every process does this. */ iinfo = symbfact(options, iam, &GAC, perm_c, etree, Glu_persist, Glu_freeable); stat->utime[SYMBFAC] = SuperLU_timer_() - t; if ( iinfo <= 0 ) { /* Successful return */ QuerySpace_dist(n, -iinfo, Glu_freeable, &symb_mem_usage); #if ( PRNTlevel>=1 ) if ( !iam ) { printf("\tNo of supers " IFMT "\n", (long long) Glu_persist->supno[n-1]+1); printf("\tSize of G(L) " IFMT "\n", (long long) Glu_freeable->xlsub[n]); printf("\tSize of G(U) " IFMT "\n", (long long) Glu_freeable->xusub[n]); printf("\tint %d, short %d, float %d, double %d\n", (int) sizeof(int_t), (int) sizeof(short), (int) sizeof(float), (int) sizeof(double)); printf("\tSYMBfact (MB):\tL\\U %.2f\ttotal %.2f\texpansions " IFMT "\n", symb_mem_usage.for_lu*1e-6, symb_mem_usage.total*1e-6, symb_mem_usage.expansions); fflush(stdout); } #endif } else { /* symbfact out of memory */ #if ( PRNTlevel>=1 ) if ( !iam ) fprintf(stderr,"symbfact() error returns " IFMT "\n",iinfo); #endif *info = iinfo; return; } } /* end serial symbolic factorization */ else { /* parallel symbolic factorization */ t = SuperLU_timer_(); flinfo = symbfact_dist(nprocs_num, noDomains, A, perm_c, perm_r, sizes, fstVtxSep, &Pslu_freeable, &(grid->comm), &symb_comm, &symb_mem_usage); stat->utime[SYMBFAC] = SuperLU_timer_() - t; if (flinfo > 0) { #if ( PRNTlevel>=1 ) fprintf(stderr, "Insufficient memory for parallel symbolic factorization."); #endif *info = flinfo; return; } } /* Destroy global GA */ if ( parSymbFact == NO || options->RowPerm != NO ) Destroy_CompCol_Matrix_dist(&GA); if ( parSymbFact == NO ) Destroy_CompCol_Permuted_dist(&GAC); } /* end if Fact ... */ if (sizes) SUPERLU_FREE (sizes); if (fstVtxSep) SUPERLU_FREE (fstVtxSep); if (symb_comm != MPI_COMM_NULL) MPI_Comm_free (&symb_comm); if (parSymbFact == NO || Fact == SamePattern_SameRowPerm) { /* CASE OF SERIAL SYMBOLIC */ /* Apply column permutation to the original distributed A */ for (j = 0; j < nnz_loc; ++j) colind[j] = perm_c[colind[j]]; /* Distribute Pc*Pr*diag(R)*A*diag(C)*Pc^T into L and U storage. NOTE: the row permutation Pc*Pr is applied internally in the distribution routine. */ t = SuperLU_timer_(); dist_mem_use = pddistribute(Fact, n, A, ScalePermstruct, Glu_freeable, LUstruct, grid); stat->utime[DIST] = SuperLU_timer_() - t; /* Deallocate storage used in symbolic factorization. */ if ( Fact != SamePattern_SameRowPerm ) { iinfo = symbfact_SubFree(Glu_freeable); SUPERLU_FREE(Glu_freeable); } } else { /* CASE OF PARALLEL SYMBOLIC */ /* Distribute Pc*Pr*diag(R)*A*diag(C)*Pc' into L and U storage. NOTE: the row permutation Pc*Pr is applied internally in the distribution routine. */ /* Apply column permutation to the original distributed A */ for (j = 0; j < nnz_loc; ++j) colind[j] = perm_c[colind[j]]; t = SuperLU_timer_(); dist_mem_use = ddist_psymbtonum(Fact, n, A, ScalePermstruct, &Pslu_freeable, LUstruct, grid); if (dist_mem_use > 0) ABORT ("Not enough memory available for dist_psymbtonum\n"); stat->utime[DIST] = SuperLU_timer_() - t; } /*if (!iam) printf ("\tDISTRIBUTE time %8.2f\n", stat->utime[DIST]);*/ /* Perform numerical factorization in parallel. */ t = SuperLU_timer_(); pdgstrf(options, m, n, anorm, LUstruct, grid, stat, info); stat->utime[FACT] = SuperLU_timer_() - t; #if 0 // #ifdef GPU_PROF // if(!iam ) // { // char* ttemp; // ttemp = getenv("IO_FILE"); // if(ttemp!=NULL) // { // printf("File being opend is %s\n",ttemp ); // FILE* fp; // fp = fopen(ttemp,"w"); // if(!fp) // { // fprintf(stderr," Couldn't open output file %s\n",ttemp); // } // int nsup=Glu_persist->supno[n-1]+1; // int ii; // for (ii = 0; ii < nsup; ++ii) // { // fprintf(fp,"%d,%d,%d,%d,%d,%d\n",gs1.mnk_min_stats[ii],gs1.mnk_min_stats[ii+nsup], // gs1.mnk_min_stats[ii+2*nsup], // gs1.mnk_max_stats[ii],gs1.mnk_max_stats[ii+nsup],gs1.mnk_max_stats[ii+2*nsup]); // } // // lastly put the timeing stats that we need // fprintf(fp,"Min %lf Max %lf totaltime %lf \n",gs1.osDgemmMin,gs1.osDgemmMax,stat->utime[FACT]); // fclose(fp); // } // } // #endif #endif if ( options->PrintStat ) { int_t TinyPivots; float for_lu, total, max, avg, temp; dQuerySpace_dist(n, LUstruct, grid, stat, &num_mem_usage); if (parSymbFact == TRUE) { /* The memory used in the redistribution routine includes the memory used for storing the symbolic structure and the memory allocated for numerical factorization */ temp = SUPERLU_MAX(symb_mem_usage.total, -dist_mem_use); if ( options->RowPerm != NO ) temp = SUPERLU_MAX(temp, GA_mem_use); } else { temp = SUPERLU_MAX ( symb_mem_usage.total + GA_mem_use, /* symbfact step */ symb_mem_usage.for_lu + dist_mem_use + num_mem_usage.for_lu /* distribution step */ ); } temp = SUPERLU_MAX(temp, num_mem_usage.total); MPI_Reduce( &temp, &max, 1, MPI_FLOAT, MPI_MAX, 0, grid->comm ); MPI_Reduce( &temp, &avg, 1, MPI_FLOAT, MPI_SUM, 0, grid->comm ); MPI_Allreduce( &stat->TinyPivots, &TinyPivots, 1, mpi_int_t, MPI_SUM, grid->comm ); stat->TinyPivots = TinyPivots; MPI_Reduce( &num_mem_usage.for_lu, &for_lu, 1, MPI_FLOAT, MPI_SUM, 0, grid->comm ); MPI_Reduce( &num_mem_usage.total, &total, 1, MPI_FLOAT, MPI_SUM, 0, grid->comm ); if (!iam) { printf("\n** Memory Usage **********************************\n"); printf("** NUMfact space (MB): (sum-of-all-processes)\n" " L\\U : %8.2f | Total : %8.2f\n", for_lu * 1e-6, total * 1e-6); printf("** Total highmark (MB):\n" " Sum-of-all : %8.2f | Avg : %8.2f | Max : %8.2f\n", avg * 1e-6, avg / grid->nprow / grid->npcol * 1e-6, max * 1e-6); printf("**************************************************\n"); fflush(stdout); } } /* end printing stats */ } /* end if (!factored) */ if ( options->Fact == DOFACT || options->Fact == SamePattern ) { /* Need to reset the solve's communication pattern, because perm_r[] and/or perm_c[] is changed. */ if ( options->SolveInitialized == YES ) { /* Initialized before */ dSolveFinalize(options, SOLVEstruct); /* Clean up structure */ options->SolveInitialized = NO; /* Reset the solve state */ } } #if 0 /* Need to revisit: Why the following is not good enough for X-to-B distribution -- inv_perm_c changed */ pxgstrs_finalize(SOLVEstruct->gstrs_comm); pxgstrs_init(A->ncol, m_loc, nrhs, fst_row, perm_r, perm_c, grid, LUstruct->Glu_persist, SOLVEstruct); #endif /* ------------------------------------------------------------ Compute the solution matrix X. ------------------------------------------------------------*/ if ( nrhs && *info == 0 ) { if ( !(b_work = doubleMalloc_dist(n)) ) ABORT("Malloc fails for b_work[]"); /* ------------------------------------------------------------ Scale the right-hand side if equilibration was performed. ------------------------------------------------------------*/ if ( notran ) { if ( rowequ ) { b_col = B; for (j = 0; j < nrhs; ++j) { irow = fst_row; for (i = 0; i < m_loc; ++i) { b_col[i] *= R[irow]; ++irow; } b_col += ldb; } } } else if ( colequ ) { b_col = B; for (j = 0; j < nrhs; ++j) { irow = fst_row; for (i = 0; i < m_loc; ++i) { b_col[i] *= C[irow]; ++irow; } b_col += ldb; } } /* Save a copy of the right-hand side. */ ldx = ldb; if ( !(X = doubleMalloc_dist(((size_t)ldx) * nrhs)) ) ABORT("Malloc fails for X[]"); x_col = X; b_col = B; for (j = 0; j < nrhs; ++j) { #if 0 /* Sherry */ for (i = 0; i < m_loc; ++i) x_col[i] = b_col[i]; #endif memcpy(x_col, b_col, m_loc * sizeof(double)); x_col += ldx; b_col += ldb; } /* ------------------------------------------------------------ Solve the linear system. ------------------------------------------------------------*/ if ( options->SolveInitialized == NO ) { /* First time */ dSolveInit(options, A, perm_r, perm_c, nrhs, LUstruct, grid, SOLVEstruct); /* Inside this routine, SolveInitialized is set to YES. For repeated call to pdgssvx(), no need to re-initialilze the Solve data & communication structures, unless a new factorization with Fact == DOFACT or SamePattern is asked for. */ } pdgstrs(n, LUstruct, ScalePermstruct, grid, X, m_loc, fst_row, ldb, nrhs, SOLVEstruct, stat, info); /* ------------------------------------------------------------ Use iterative refinement to improve the computed solution and compute error bounds and backward error estimates for it. ------------------------------------------------------------*/ if ( options->IterRefine ) { /* Improve the solution by iterative refinement. */ int_t *it; int_t *colind_gsmv = SOLVEstruct->A_colind_gsmv; /* This was allocated and set to NULL in dSolveInit() */ SOLVEstruct_t *SOLVEstruct1; /* Used by refinement. */ t = SuperLU_timer_(); if ( options->RefineInitialized == NO || Fact == DOFACT ) { /* All these cases need to re-initialize gsmv structure */ if ( options->RefineInitialized ) pdgsmv_finalize(SOLVEstruct->gsmv_comm); pdgsmv_init(A, SOLVEstruct->row_to_proc, grid, SOLVEstruct->gsmv_comm); /* Save a copy of the transformed local col indices in colind_gsmv[]. */ if ( colind_gsmv ) SUPERLU_FREE(colind_gsmv); if ( !(it = intMalloc_dist(nnz_loc)) ) ABORT("Malloc fails for colind_gsmv[]"); colind_gsmv = SOLVEstruct->A_colind_gsmv = it; for (i = 0; i < nnz_loc; ++i) colind_gsmv[i] = colind[i]; options->RefineInitialized = YES; } else if ( Fact == SamePattern || Fact == SamePattern_SameRowPerm ) { double atemp; int_t k, jcol, p; /* Swap to beginning the part of A corresponding to the local part of X, as was done in pdgsmv_init() */ for (i = 0; i < m_loc; ++i) { /* Loop through each row */ k = rowptr[i]; for (j = rowptr[i]; j < rowptr[i+1]; ++j) { jcol = colind[j]; p = SOLVEstruct->row_to_proc[jcol]; if ( p == iam ) { /* Local */ atemp = a[k]; a[k] = a[j]; a[j] = atemp; ++k; } } } /* Re-use the local col indices of A obtained from the previous call to pdgsmv_init() */ for (i = 0; i < nnz_loc; ++i) colind[i] = colind_gsmv[i]; } if ( nrhs == 1 ) { /* Use the existing solve structure */ SOLVEstruct1 = SOLVEstruct; } else { /* For nrhs > 1, since refinement is performed for RHS one at a time, the communication structure for pdgstrs is different than the solve with nrhs RHS. So we use SOLVEstruct1 for the refinement step. */ if ( !(SOLVEstruct1 = (SOLVEstruct_t *) SUPERLU_MALLOC(sizeof(SOLVEstruct_t))) ) ABORT("Malloc fails for SOLVEstruct1"); /* Copy the same stuff */ SOLVEstruct1->row_to_proc = SOLVEstruct->row_to_proc; SOLVEstruct1->inv_perm_c = SOLVEstruct->inv_perm_c; SOLVEstruct1->num_diag_procs = SOLVEstruct->num_diag_procs; SOLVEstruct1->diag_procs = SOLVEstruct->diag_procs; SOLVEstruct1->diag_len = SOLVEstruct->diag_len; SOLVEstruct1->gsmv_comm = SOLVEstruct->gsmv_comm; SOLVEstruct1->A_colind_gsmv = SOLVEstruct->A_colind_gsmv; /* Initialize the *gstrs_comm for 1 RHS. */ if ( !(SOLVEstruct1->gstrs_comm = (pxgstrs_comm_t *) SUPERLU_MALLOC(sizeof(pxgstrs_comm_t))) ) ABORT("Malloc fails for gstrs_comm[]"); pxgstrs_init(n, m_loc, 1, fst_row, perm_r, perm_c, grid, Glu_persist, SOLVEstruct1); } pdgsrfs(n, A, anorm, LUstruct, ScalePermstruct, grid, B, ldb, X, ldx, nrhs, SOLVEstruct1, berr, stat, info); /* Deallocate the storage associated with SOLVEstruct1 */ if ( nrhs > 1 ) { pxgstrs_finalize(SOLVEstruct1->gstrs_comm); SUPERLU_FREE(SOLVEstruct1); } stat->utime[REFINE] = SuperLU_timer_() - t; } /* end if IterRefine */ /* Permute the solution matrix B <= Pc'*X. */ pdPermute_Dense_Matrix(fst_row, m_loc, SOLVEstruct->row_to_proc, SOLVEstruct->inv_perm_c, X, ldx, B, ldb, nrhs, grid); #if ( DEBUGlevel>=2 ) printf("\n (%d) .. After pdPermute_Dense_Matrix(): b =\n", iam); for (i = 0; i < m_loc; ++i) printf("\t(%d)\t%4d\t%.10f\n", iam, i+fst_row, B[i]); #endif /* Transform the solution matrix X to a solution of the original system before equilibration. */ if ( notran ) { if ( colequ ) { b_col = B; for (j = 0; j < nrhs; ++j) { irow = fst_row; for (i = 0; i < m_loc; ++i) { b_col[i] *= C[irow]; ++irow; } b_col += ldb; } } } else if ( rowequ ) { b_col = B; for (j = 0; j < nrhs; ++j) { irow = fst_row; for (i = 0; i < m_loc; ++i) { b_col[i] *= R[irow]; ++irow; } b_col += ldb; } } SUPERLU_FREE(b_work); SUPERLU_FREE(X); } /* end if nrhs != 0 && *info == 0 */ #if ( PRNTlevel>=1 ) if ( !iam ) printf(".. DiagScale = %d\n", ScalePermstruct->DiagScale); #endif /* Deallocate R and/or C if it was not used. */ if ( Equil && Fact != SamePattern_SameRowPerm ) { switch ( ScalePermstruct->DiagScale ) { case NOEQUIL: SUPERLU_FREE(R); SUPERLU_FREE(C); break; case ROW: SUPERLU_FREE(C); break; case COL: SUPERLU_FREE(R); break; } } #if 0 if ( !factored && Fact != SamePattern_SameRowPerm && !parSymbFact) Destroy_CompCol_Permuted_dist(&GAC); #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pdgssvx()"); #endif } SuperLU_DIST_5.3.0/SRC/pdgstrf.c0000644013363400111340000022307413233431301015110 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Performs LU factorization in parallel * *
 * -- Distributed SuperLU routine (version 5.2) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * October 1, 2014
 *
 * Modified:
 *     September 1, 1999
 *     Feburary 7, 2001  use MPI_Isend/MPI_Irecv
 *     October 15, 2008  latency-reducing panel factorization
 *     July    12, 2011  static scheduling and arbitrary look-ahead
 *     March   13, 2013  change NTAGS to MPI_TAG_UB value
 *     September 24, 2015 replace xLAMCH by xMACH, using C99 standard.
 *     December 31, 2015 rename xMACH to xMACH_DIST.
 *     September 30, 2017 optimization for Intel Knights Landing (KNL) node .
 *
 * Sketch of the algorithm 
 *
 * ======================= 
 *    
 * The following relations hold:
 *     * A_kk = L_kk * U_kk
 *     * L_ik = Aik * U_kk^(-1)
 *     * U_kj = L_kk^(-1) * A_kj
 *
 *              ----------------------------------
 *              |   |                            |
 *              ----|-----------------------------
 *              |   | \ U_kk|                    |
 *              |   |   \   |        U_kj        |
 *              |   |L_kk \ |         ||         |
 *              ----|-------|---------||----------
 *              |   |       |         \/         |
 *              |   |       |                    |
 *              |   |       |                    |
 *              |   |       |                    |
 *              |   | L_ik ==>       A_ij        |
 *              |   |       |                    |
 *              |   |       |                    |
 *              |   |       |                    |
 *              ----------------------------------
 *
 * Handle the first block of columns separately.
 *     * Factor diagonal and subdiagonal blocks and test for exact
 *       singularity. ( pdgstrf2(0), one column at a time )
 *     * Compute block row of U
 *     * Update trailing matrix
 *
 * Loop over the remaining blocks of columns.
 *   mycol = MYCOL( iam, grid );
 *   myrow = MYROW( iam, grid );
 *   N = nsupers;
 *   For (k = 1; k < N; ++k) {
 *       krow = PROW( k, grid );
 *       kcol = PCOL( k, grid );
 *       Pkk = PNUM( krow, kcol, grid );
 *
 *     * Factor diagonal and subdiagonal blocks and test for exact
 *       singularity.
 *       if ( mycol == kcol ) {
 *           pdgstrf2(k), one column at a time
 *       }
 *
 *     * Parallel triangular solve
 *       if ( iam == Pkk ) multicast L_k,k to this process row;
 *       if ( myrow == krow && mycol != kcol ) {
 *          Recv L_k,k from process Pkk;
 *          for (j = k+1; j < N; ++j)
 *              if ( PCOL( j, grid ) == mycol && A_k,j != 0 )
 *                 U_k,j = L_k,k \ A_k,j;
 *       }
 *
 *     * Parallel rank-k update
 *       if ( myrow == krow ) multicast U_k,k+1:N to this process column;
 *       if ( mycol == kcol ) multicast L_k+1:N,k to this process row;
 *       if ( myrow != krow ) {
 *          Pkj = PNUM( krow, mycol, grid );
 *          Recv U_k,k+1:N from process Pkj;
 *       }
 *       if ( mycol != kcol ) {
 *          Pik = PNUM( myrow, kcol, grid );
 *          Recv L_k+1:N,k from process Pik;
 *       }
 *       for (j = k+1; k < N; ++k) {
 *          for (i = k+1; i < N; ++i)
 *              if ( myrow == PROW( i, grid ) && mycol == PCOL( j, grid )
 *                   && L_i,k != 0 && U_k,j != 0 )
 *                 A_i,j = A_i,j - L_i,k * U_k,j;
 *       }
 *  }
 *
 * 
*/ #include /*#include "mkl.h"*/ #include "superlu_ddefs.h" #ifdef GPU_ACC #include "cublas_utils.h" /*#include "cublas_dgemm.h"*/ // #define NUM_CUDA_STREAMS 16 // #define NUM_CUDA_STREAMS 16 #endif /* Various defininations */ /* Name : SUPERNODE_PROFILE Purpose : For SuperNode Level profiling of various measurements such as gigaflop/sec obtained,bandwidth achieved: Overhead : Low */ // #define SUPERNODE_PROFILE /* Name : BAELINE Purpose : baseline to compare performance against Overhead : NA : this won't be used for running experiments */ // #define BASELINE /* Name : PHI_FRAMEWORK Purpose : To simulate and test algorithm used for offloading Phi Overhead : NA : this won't be used for running experiments */ #define PHI_FRAMEWORK #if 0 #define CACHELINE 64 /* bytes, Xeon Phi KNL */ #else #define CACHELINE 0 /* not worry about false sharing of different threads */ #endif //#define GEMM_PADLEN 1 #define GEMM_PADLEN 8 #define PDGSTRF2 pdgstrf2_trsm #define PDGSTRS2 pdgstrs2_omp extern void PDGSTRF2 (superlu_dist_options_t *, int_t, int_t, double, Glu_persist_t *, gridinfo_t *, LocalLU_t *, MPI_Request *, int, SuperLUStat_t *, int *); #ifdef _CRAY extern void PDGSTRS2 (int_t, int_t, Glu_persist_t *, gridinfo_t *, LocalLU_t *, SuperLUStat_t *, _fcd, _fcd, _fcd); #else extern void PDGSTRS2 (int_t, int_t, Glu_persist_t *, gridinfo_t *, LocalLU_t *, SuperLUStat_t *); #endif #ifdef ISORT extern void isort (int_t N, int_t * ARRAY1, int_t * ARRAY2); extern void isort1 (int_t N, int_t * ARRAY); #else int superlu_sort_perm (const void *arg1, const void *arg2) { const int_t *val1 = (const int_t *) arg1; const int_t *val2 = (const int_t *) arg2; return (*val2 < *val1); } #endif /************************************************************************/ #include "dscatter.c" /************************************************************************/ /*! \brief * *
 * Purpose
 * =======
 *
 * PDGSTRF performs the LU factorization in parallel.
 *
 * Arguments
 * =========
 *
 * options (input) superlu_dist_options_t*
 *         The structure defines the input parameters to control
 *         how the LU decomposition will be performed.
 *         The following field should be defined:
 *         o ReplaceTinyPivot (yes_no_t)
 *           Specifies whether to replace the tiny diagonals by
 *           sqrt(epsilon)*norm(A) during LU factorization.
 *
 * m      (input) int
 *        Number of rows in the matrix.
 *
 * n      (input) int
 *        Number of columns in the matrix.
 *
 * anorm  (input) double
 *        The norm of the original matrix A, or the scaled A if
 *        equilibration was done.
 *
 * LUstruct (input/output) LUstruct_t*
 *         The data structures to store the distributed L and U factors.
 *         The following fields should be defined:
 *
 *         o Glu_persist (input) Glu_persist_t*
 *           Global data structure (xsup, supno) replicated on all processes,
 *           describing the supernode partition in the factored matrices
 *           L and U:
 *         xsup[s] is the leading column of the s-th supernode,
 *             supno[i] is the supernode number to which column i belongs.
 *
 *         o Llu (input/output) LocalLU_t*
 *           The distributed data structures to store L and U factors.
 *           See superlu_ddefs.h for the definition of 'LocalLU_t'.
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh. It contains the MPI communicator, the number
 *        of process rows (NPROW), the number of process columns (NPCOL),
 *        and my process rank. It is an input argument to all the
 *        parallel routines.
 *        Grid can be initialized by subroutine SUPERLU_GRIDINIT.
 *        See superlu_ddefs.h for the definition of 'gridinfo_t'.
 *
 * stat   (output) SuperLUStat_t*
 *        Record the statistics on runtime and floating-point operation count.
 *        See util.h for the definition of 'SuperLUStat_t'.
 *
 * info   (output) int*
 *        = 0: successful exit
 *        < 0: if info = -i, the i-th argument had an illegal value
 *        > 0: if info = i, U(i,i) is exactly zero. The factorization has
 *             been completed, but the factor U is exactly singular,
 *             and division by zero will occur if it is used to solve a
 *             system of equations.
 * 
*/ int_t pdgstrf(superlu_dist_options_t * options, int m, int n, double anorm, LUstruct_t * LUstruct, gridinfo_t * grid, SuperLUStat_t * stat, int *info) { #ifdef _CRAY _fcd ftcs = _cptofcd ("N", strlen ("N")); _fcd ftcs1 = _cptofcd ("L", strlen ("L")); _fcd ftcs2 = _cptofcd ("N", strlen ("N")); _fcd ftcs3 = _cptofcd ("U", strlen ("U")); #endif double zero = 0.0, alpha = 1.0, beta = 0.0; int_t *xsup; int_t *lsub, *lsub1, *usub, *Usub_buf; int_t **Lsub_buf_2, **Usub_buf_2; double **Lval_buf_2, **Uval_buf_2; /* pointers to starts of bufs */ double *lusup, *lusup1, *uval, *Uval_buf; /* pointer to current buf */ int_t fnz, i, ib, ijb, ilst, it, iukp, jb, jj, klst, knsupc, lb, lib, ldv, ljb, lptr, lptr0, lptrj, luptr, luptr0, luptrj, nlb, nub, nsupc, rel, rukp, il, iu; int_t Pc, Pr; int iam, kcol, krow, yourcol, mycol, myrow, pi, pj; int j, k, lk, nsupers; /* k - current panel to work on */ int k0; /* counter of the next supernode to be factored */ int kk, kk0, kk1, kk2, jj0; /* panels in the look-ahead window */ int iukp0, rukp0, flag0, flag1; int nsupr, nbrow, segsize; int msg0, msg2; int_t **Ufstnz_br_ptr, **Lrowind_bc_ptr; double **Unzval_br_ptr, **Lnzval_bc_ptr; int_t *index; double *nzval; int_t *iuip, *ruip; /* Pointers to U index/nzval; size ceil(NSUPERS/Pr). */ double *ucol; int *indirect, *indirect2; int_t *tempi; double *tempu, *tempv, *tempr; /* double *tempv2d, *tempU2d; Sherry */ int iinfo; int *ToRecv, *ToSendD, **ToSendR; Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; superlu_scope_t *scp; float s_eps; double thresh; /*int full;*/ int ldt, ldu, lead_zero, ncols, ncb, nrb, p, pr, pc, nblocks; int_t *etree_supno_l, *etree_supno, *blocks, *blockr, *Ublock, *Urows, *Lblock, *Lrows, *perm_u, *sf_block, *sf_block_l, *nnodes_l, *nnodes_u, *edag_supno_l, *recvbuf, **edag_supno; float edag_supno_l_bytes; #ifdef ISORT int_t *iperm_u; #endif int *msgcnt; /* Count the size of the message xfer'd in each buffer: * 0 : transferred in Lsub_buf[] * 1 : transferred in Lval_buf[] * 2 : transferred in Usub_buf[] * 3 : transferred in Uval_buf[] */ int **msgcnts, **msgcntsU; /* counts in the look-ahead window */ int *factored; /* factored[j] == 0 : L col panel j is factorized. */ int *factoredU; /* factoredU[i] == 1 : U row panel i is factorized. */ int nnodes, *sendcnts, *sdispls, *recvcnts, *rdispls, *srows, *rrows; etree_node *head, *tail, *ptr; int *num_child; int num_look_aheads, look_id, *look_ahead; int_t *perm_c_supno, *iperm_c_supno; MPI_Request *recv_req, **recv_reqs, **send_reqs, **send_reqs_u, **recv_reqs_u; MPI_Request *send_req, *U_diag_blk_send_req = NULL; MPI_Status status; void *attr_val; int flag; /* The following variables are used to pad GEMM dimensions so that each is a multiple of vector length (8 doubles for KNL) */ int gemm_m_pad = GEMM_PADLEN, gemm_k_pad = GEMM_PADLEN, gemm_n_pad = GEMM_PADLEN; int gemm_padding = 0; int iword = sizeof (int_t); int dword = sizeof (double); /* For measuring load imbalence in omp threads */ double omp_load_imblc = 0.0; double *omp_loop_time; double schur_flop_timer = 0.0; double pdgstrf2_timer = 0.0; double pdgstrs2_timer = 0.0; double lookaheadupdatetimer = 0.0; double InitTimer = 0.0; /* including compute schedule, malloc */ double tt_start, tt_end; /* #if !defined( GPU_ACC ) */ /* Counters for memory operations and timings */ double scatter_mem_op_counter = 0.0; double scatter_mem_op_timer = 0.0; double scatterL_mem_op_counter = 0.0; double scatterL_mem_op_timer = 0.0; double scatterU_mem_op_counter = 0.0; double scatterU_mem_op_timer = 0.0; /* Counters for flops/gather/scatter and timings */ double GatherLTimer = 0.0; double LookAheadRowSepMOP = 0.0; double GatherUTimer = 0.0; double GatherMOP = 0.0; double LookAheadGEMMTimer = 0.0; double LookAheadGEMMFlOp = 0.0; double LookAheadScatterTimer = 0.0; double LookAheadScatterMOP = 0.0; double RemainGEMMTimer = 0.0; double RemainGEMM_flops = 0.0; double RemainScatterTimer = 0.0; double NetSchurUpTimer = 0.0; double schur_flop_counter = 0.0; /* #endif */ #if ( PRNTlevel>= 1) /* count GEMM max dimensions */ int gemm_max_m = 0, gemm_max_n = 0, gemm_max_k = 0; #endif #if ( DEBUGlevel>=2 ) int_t num_copy = 0, num_update = 0; #endif #if ( PRNTlevel==3 ) int zero_msg = 0, total_msg = 0; #endif #if ( PROFlevel>=1 ) double t1, t2; float msg_vol = 0, msg_cnt = 0; double comm_wait_time = 0.0; /* Record GEMM dimensions and times */ FILE *fopen(), *fgemm; int gemm_count = 0; typedef struct { int m, n, k; double microseconds; } gemm_profile; gemm_profile *gemm_stats; #endif /* Test the input parameters. */ *info = 0; if (m < 0) *info = -2; else if (n < 0) *info = -3; if (*info) { pxerr_dist ("pdgstrf", grid, -*info); return (-1); } /* Quick return if possible. */ if (m == 0 || n == 0) return 0; double tt1 = SuperLU_timer_ (); /* * Initialization. */ iam = grid->iam; Pc = grid->npcol; Pr = grid->nprow; myrow = MYROW (iam, grid); mycol = MYCOL (iam, grid); nsupers = Glu_persist->supno[n - 1] + 1; xsup = Glu_persist->xsup; s_eps = smach_dist("Epsilon"); thresh = s_eps * anorm; MPI_Attr_get (MPI_COMM_WORLD, MPI_TAG_UB, &attr_val, &flag); if (!flag) { fprintf (stderr, "Could not get TAG_UB\n"); return (-1); } int tag_ub = *(int *) attr_val; #if ( PRNTlevel>=1 ) if (!iam) { printf ("MPI tag upper bound = %d\n", tag_ub); fflush(stdout); } #endif #if ( DEBUGlevel>=1 ) if (s_eps == 0.0) printf (" ***** warning s_eps = %e *****\n", s_eps); CHECK_MALLOC (iam, "Enter pdgstrf()"); #endif #if (PROFlevel >= 1 ) gemm_stats = (gemm_profile *) SUPERLU_MALLOC(nsupers * sizeof(gemm_profile)); if (iam == 0) fgemm = fopen("dgemm_mnk.dat", "w"); int *prof_sendR = intCalloc_dist(nsupers); #endif stat->ops[FACT] = 0.0; stat->current_buffer = 0.0; stat->peak_buffer = 0.0; stat->gpu_buffer = 0.0; /* make sure the range of look-ahead window [0, MAX_LOOKAHEADS-1] */ num_look_aheads = SUPERLU_MAX(0, SUPERLU_MIN(options->num_lookaheads, MAX_LOOKAHEADS - 1)); if (Pr * Pc > 1) { if (!(U_diag_blk_send_req = (MPI_Request *) SUPERLU_MALLOC (Pr * sizeof (MPI_Request)))) ABORT ("Malloc fails for U_diag_blk_send_req[]."); /* flag no outstanding Isend */ U_diag_blk_send_req[myrow] = MPI_REQUEST_NULL; /* used 0 before */ /* allocating buffers for look-ahead */ i = Llu->bufmax[0]; if (i != 0) { if ( !(Llu->Lsub_buf_2[0] = intMalloc_dist ((num_look_aheads + 1) * ((size_t) i))) ) ABORT ("Malloc fails for Lsub_buf."); tempi = Llu->Lsub_buf_2[0]; for (jj = 0; jj < num_look_aheads; jj++) Llu->Lsub_buf_2[jj+1] = tempi + i*(jj+1); /* vectorize */ //Llu->Lsub_buf_2[jj + 1] = Llu->Lsub_buf_2[jj] + i; } i = Llu->bufmax[1]; if (i != 0) { if (!(Llu->Lval_buf_2[0] = doubleMalloc_dist ((num_look_aheads + 1) * ((size_t) i)))) ABORT ("Malloc fails for Lval_buf[]."); tempr = Llu->Lval_buf_2[0]; for (jj = 0; jj < num_look_aheads; jj++) Llu->Lval_buf_2[jj+1] = tempr + i*(jj+1); /* vectorize */ //Llu->Lval_buf_2[jj + 1] = Llu->Lval_buf_2[jj] + i; } i = Llu->bufmax[2]; if (i != 0) { if (!(Llu->Usub_buf_2[0] = intMalloc_dist ((num_look_aheads + 1) * i))) ABORT ("Malloc fails for Usub_buf_2[]."); tempi = Llu->Usub_buf_2[0]; for (jj = 0; jj < num_look_aheads; jj++) Llu->Usub_buf_2[jj+1] = tempi + i*(jj+1); /* vectorize */ //Llu->Usub_buf_2[jj + 1] = Llu->Usub_buf_2[jj] + i; } i = Llu->bufmax[3]; if (i != 0) { if (!(Llu->Uval_buf_2[0] = doubleMalloc_dist ((num_look_aheads + 1) * i))) ABORT ("Malloc fails for Uval_buf_2[]."); tempr = Llu->Uval_buf_2[0]; for (jj = 0; jj < num_look_aheads; jj++) Llu->Uval_buf_2[jj+1] = tempr + i*(jj+1); /* vectorize */ //Llu->Uval_buf_2[jj + 1] = Llu->Uval_buf_2[jj] + i; } } log_memory( (Llu->bufmax[0] + Llu->bufmax[2]) * (num_look_aheads + 1) * iword + (Llu->bufmax[1] + Llu->bufmax[3]) * (num_look_aheads + 1) * dword, stat ); /* creating pointers to the look-ahead buffers */ if (! (Lsub_buf_2 = SUPERLU_MALLOC ((1 + num_look_aheads) * sizeof (int_t *)))) ABORT ("Malloc fails for Lsub_buf_2[]."); if (! (Lval_buf_2 = SUPERLU_MALLOC ((1 + num_look_aheads) * sizeof (double *)))) ABORT ("Malloc fails for Lval_buf_2[]."); if (! (Usub_buf_2 = SUPERLU_MALLOC ((1 + num_look_aheads) * sizeof (int_t *)))) ABORT ("Malloc fails for Uval_buf_2[]."); if (! (Uval_buf_2 = SUPERLU_MALLOC ((1 + num_look_aheads) * sizeof (double *)))) ABORT ("Malloc fails for buf_2[]."); for (i = 0; i <= num_look_aheads; i++) { Lval_buf_2[i] = Llu->Lval_buf_2[i]; Lsub_buf_2[i] = Llu->Lsub_buf_2[i]; Uval_buf_2[i] = Llu->Uval_buf_2[i]; Usub_buf_2[i] = Llu->Usub_buf_2[i]; } if (!(msgcnts = SUPERLU_MALLOC ((1 + num_look_aheads) * sizeof (int *)))) ABORT ("Malloc fails for msgcnts[]."); if (!(msgcntsU = SUPERLU_MALLOC ((1 + num_look_aheads) * sizeof (int *)))) ABORT ("Malloc fails for msgcntsU[]."); for (i = 0; i <= num_look_aheads; i++) { if (!(msgcnts[i] = SUPERLU_MALLOC (4 * sizeof (int)))) ABORT ("Malloc fails for msgcnts[]."); if (!(msgcntsU[i] = SUPERLU_MALLOC (4 * sizeof (int)))) ABORT ("Malloc fails for msgcntsU[]."); } if (! (recv_reqs_u = SUPERLU_MALLOC ((1 + num_look_aheads) * sizeof (MPI_Request *)))) ABORT ("Malloc fails for recv_reqs_u[]."); if (! (send_reqs_u = SUPERLU_MALLOC ((1 + num_look_aheads) * sizeof (MPI_Request *)))) ABORT ("Malloc fails for send_reqs_u[]."); if (! (send_reqs = SUPERLU_MALLOC ((1 + num_look_aheads) * sizeof (MPI_Request *)))) ABORT ("Malloc fails for send_reqs_u[]."); if (! (recv_reqs = SUPERLU_MALLOC ((1 + num_look_aheads) * sizeof (MPI_Request *)))) ABORT ("Malloc fails for recv_reqs[]."); for (i = 0; i <= num_look_aheads; i++) { if (!(recv_reqs_u[i] = (MPI_Request *) SUPERLU_MALLOC (2 * sizeof (MPI_Request)))) ABORT ("Malloc fails for recv_req_u[i]."); if (!(send_reqs_u[i] = (MPI_Request *) SUPERLU_MALLOC (2 * Pr * sizeof (MPI_Request)))) ABORT ("Malloc fails for send_req_u[i]."); if (!(send_reqs[i] = (MPI_Request *) SUPERLU_MALLOC (2 * Pc * sizeof (MPI_Request)))) ABORT ("Malloc fails for send_reqs[i]."); if (!(recv_reqs[i] = (MPI_Request *) SUPERLU_MALLOC (4 * sizeof (MPI_Request)))) ABORT ("Malloc fails for recv_req[]."); send_reqs[i][0] = send_reqs[i][1] = MPI_REQUEST_NULL; recv_reqs[i][0] = recv_reqs[i][1] = MPI_REQUEST_NULL; } if (!(factored = SUPERLU_MALLOC (nsupers * sizeof (int_t)))) ABORT ("Malloc fails for factored[]."); if (!(factoredU = SUPERLU_MALLOC (nsupers * sizeof (int_t)))) ABORT ("Malloc fails for factoredU[]."); for (i = 0; i < nsupers; i++) factored[i] = factoredU[i] = -1; log_memory(2 * nsupers * iword, stat); int num_threads = 1; #ifdef _OPENMP #pragma omp parallel default(shared) #pragma omp master { //if (omp_get_thread_num () == 0) num_threads = omp_get_num_threads (); } #endif #if 0 omp_loop_time = (double *) _mm_malloc (sizeof (double) * num_threads,64); #else omp_loop_time = (double *) doubleMalloc_dist(num_threads); #endif #if ( PRNTlevel>=1 ) if(!iam) { printf(".. Starting with %d OpenMP threads \n", num_threads ); fflush(stdout); } #endif nblocks = 0; ncb = nsupers / Pc; /* number of column blocks, horizontal */ nrb = nsupers / Pr; /* number of row blocks, vertical */ /* in order to have dynamic scheduling */ int *full_u_cols; int *blk_ldu; #if 0 full_u_cols = (int_t *) _mm_malloc (sizeof (int_t) * ncb,64); blk_ldu = (int_t *) _mm_malloc (sizeof (int_t) * ncb,64); #else full_u_cols = SUPERLU_MALLOC(ncb * sizeof(int)); blk_ldu = SUPERLU_MALLOC(ncb * sizeof(int)); #endif log_memory(2 * ncb * iword, stat); #if 0 /* Sherry: not used? */ /* This bunch is used for static scheduling */ pair *full_col_count = (pair *) _mm_malloc (sizeof (pair) * ncb,64); int_t *count_cols, *sum_cols, *partition; count_cols = (int_t *) _mm_malloc (sizeof (int_t) * num_threads,64); sum_cols = (int_t *) _mm_malloc (sizeof (int_t) * num_threads,64); partition = (int_t *) _mm_malloc (sizeof (int_t) * num_threads * ncb,64); int_t ldp = ncb; #endif /* ################################################################## * Compute a good static schedule based on the factorization task graph. * ################################################################## */ perm_c_supno = SUPERLU_MALLOC (2 * nsupers * sizeof (int_t)); iperm_c_supno = perm_c_supno + nsupers; static_schedule(options, m, n, LUstruct, grid, stat, perm_c_supno, iperm_c_supno, info); #if ( DEBUGlevel >= 2 ) PrintInt10("schedule:perm_c_supno", nsupers, perm_c_supno); /* Turn off static schedule */ printf("[%d] .. Turn off static schedule for debugging ..\n", iam); for (i = 0; i < nsupers; ++i) perm_c_supno[i] = iperm_c_supno[i] = i; #endif /* ################################################################## */ /* constructing look-ahead table to indicate the last dependency */ int *look_ahead_l; /* Sherry: add comment on look_ahead_l[] */ stat->num_look_aheads = num_look_aheads; look_ahead_l = SUPERLU_MALLOC (nsupers * sizeof (int)); look_ahead = SUPERLU_MALLOC (nsupers * sizeof (int)); for (lb = 0; lb < nsupers; lb++) look_ahead_l[lb] = -1; /* vectorized */ log_memory(3 * nsupers * iword, stat); /* Sherry: omp parallel? not worth doing, due to concurrent write to look_ahead_l[jb] */ for (lb = 0; lb < nrb; ++lb) { /* go through U-factor */ ib = lb * Pr + myrow; index = Llu->Ufstnz_br_ptr[lb]; if (index) { /* Not an empty row */ k = BR_HEADER; for (j = 0; j < index[0]; ++j) { jb = index[k]; /* global block number */ if (jb != ib) look_ahead_l[jb] = SUPERLU_MAX (iperm_c_supno[ib], look_ahead_l[jb]); k += UB_DESCRIPTOR + SuperSize (index[k]); } } } if (myrow < nsupers % grid->nprow) { /* leftover block rows */ ib = nrb * Pr + myrow; index = Llu->Ufstnz_br_ptr[nrb]; if (index) { /* Not an empty row */ k = BR_HEADER; for (j = 0; j < index[0]; ++j) { jb = index[k]; if (jb != ib) look_ahead_l[jb] = SUPERLU_MAX (iperm_c_supno[ib], look_ahead_l[jb]); k += UB_DESCRIPTOR + SuperSize (index[k]); } } } if (options->SymPattern == NO) { /* Sherry: omp parallel? not worth doing, due to concurrent write to look_ahead_l[jb] */ for (lb = 0; lb < ncb; lb++) { /* go through L-factor */ ib = lb * Pc + mycol; index = Llu->Lrowind_bc_ptr[lb]; if (index) { k = BC_HEADER; for (j = 0; j < index[0]; j++) { jb = index[k]; if (jb != ib) look_ahead_l[jb] = SUPERLU_MAX (iperm_c_supno[ib], look_ahead_l[jb]); k += LB_DESCRIPTOR + index[k + 1]; } } } if (mycol < nsupers % grid->npcol) { /* leftover block columns */ ib = ncb * Pc + mycol; index = Llu->Lrowind_bc_ptr[ncb]; if (index) { k = BC_HEADER; for (j = 0; j < index[0]; j++) { jb = index[k]; if (jb != ib) look_ahead_l[jb] = SUPERLU_MAX (iperm_c_supno[ib], look_ahead_l[jb]); k += LB_DESCRIPTOR + index[k + 1]; } } } } MPI_Allreduce (look_ahead_l, look_ahead, nsupers, MPI_INT, MPI_MAX, grid->comm); SUPERLU_FREE (look_ahead_l); #ifdef ISORT iperm_u = SUPERLU_MALLOC (nsupers * sizeof (int_t)); perm_u = SUPERLU_MALLOC (nsupers * sizeof (int_t)); #else perm_u = SUPERLU_MALLOC (2 * nsupers * sizeof (int_t)); #endif log_memory(nsupers * iword, stat); k = sp_ienv_dist (3); /* max supernode size */ #if 0 if ( !(Llu->ujrow = doubleMalloc_dist(k*(k+1)/2)) ) ABORT("Malloc fails for ujrow[]."); #else /* Instead of half storage, we'll do full storage */ if (!(Llu->ujrow = doubleCalloc_dist (k * k))) ABORT ("Malloc fails for ujrow[]."); #endif log_memory(k * k * iword, stat); #if ( PRNTlevel>=1 ) if (!iam) { printf (".. thresh = s_eps %e * anorm %e = %e\n", s_eps, anorm, thresh); printf (".. Buffer size: Lsub %ld\tLval %ld\tUsub %ld\tUval %ld\tLDA %ld\n", (long int) Llu->bufmax[0], (long int) Llu->bufmax[1], (long int) Llu->bufmax[2], (long int) Llu->bufmax[3], (long int) Llu->bufmax[4]); fflush(stdout); } #endif Lrowind_bc_ptr = Llu->Lrowind_bc_ptr; Lnzval_bc_ptr = Llu->Lnzval_bc_ptr; Ufstnz_br_ptr = Llu->Ufstnz_br_ptr; Unzval_br_ptr = Llu->Unzval_br_ptr; ToRecv = Llu->ToRecv; ToSendD = Llu->ToSendD; ToSendR = Llu->ToSendR; ldt = sp_ienv_dist (3); /* Size of maximum supernode */ k = CEILING (nsupers, Pr); /* Number of local block rows */ /* Following code is for finding maximum row dimension of all L panels */ int local_max_row_size = 0; int max_row_size; #if 0 #if defined _OPENMP // Sherry: parallel reduction -- seems slower? #pragma omp parallel for reduction(max :local_max_row_size) private(lk,lsub) #endif #endif for (int i = mycol; i < nsupers; i += Pc) { /* grab my local columns */ //int tpc = PCOL (i, grid); lk = LBj (i, grid); lsub = Lrowind_bc_ptr[lk]; if (lsub != NULL) { if (lsub[1] > local_max_row_size) local_max_row_size = lsub[1]; } } /* Max row size is global reduction within a row */ MPI_Allreduce (&local_max_row_size, &max_row_size, 1, MPI_INT, MPI_MAX, (grid->rscp.comm)); /* Buffer size is max of look-ahead window */ /* int_t buffer_size = SUPERLU_MAX (max_row_size * num_threads * ldt, get_max_buffer_size ()); */ #ifdef GPU_ACC int cublas_nb = get_cublas_nb(); int nstreams = get_num_cuda_streams (); int buffer_size = SUPERLU_MAX(max_row_size*nstreams*cublas_nb,get_max_buffer_size()); /* array holding last column blk for each partition, used in SchCompUdt--CUDA.c */ #if 0 int *stream_end_col = (int_t *) _mm_malloc (sizeof (int_t) * nstreams,64); #else int *stream_end_col = SUPERLU_MALLOC( nstreams * sizeof(int) ); #endif #else /* not to use GPU */ int Threads_per_process = get_thread_per_process(); int buffer_size = SUPERLU_MAX(max_row_size*Threads_per_process*ldt,get_max_buffer_size()); #endif /* end ifdef GPU_ACC */ #if 0 /* symmetric assumption -- using L's supernode to estimate. */ /* Note that in following expression 8 can be anything as long as its not too big */ int bigu_size = 8 * sp_ienv_dist (3) * (max_row_size); #else int_t bigu_size = estimate_bigu_size( nsupers, ldt, Ufstnz_br_ptr, Glu_persist, grid, perm_u ); #endif /* +16 to avoid cache line false sharing */ int_t bigv_size = SUPERLU_MAX(max_row_size * (bigu_size / ldt), (ldt*ldt + CACHELINE / dword) * num_threads); /* bigU and bigV are either on CPU or on GPU, not both. */ double* bigU; /* for storing entire U(k,:) panel, prepare for GEMM. bigU has the same size either on CPU or on CPU. */ double* bigV; /* for storing GEMM output matrix, i.e. update matrix. bigV is large to hold the aggregate GEMM output.*/ #if ( PRNTlevel>=1 ) if(!iam) { printf("max_nrows in L panel %d\n", max_row_size); printf("\t.. GEMM buffer size: max_nrows X max_ncols = %d x %d\n", max_row_size, (bigu_size / ldt)); printf(".. BIG U size %d\t BIG V size %d\n", bigu_size, bigv_size); fflush(stdout); } #endif #ifdef GPU_ACC if ( checkCuda(cudaHostAlloc((void**)&bigU, bigu_size * sizeof(double), cudaHostAllocDefault)) ) ABORT("Malloc fails for dgemm buffer U "); bigv_size = buffer_size; #if ( PRNTlevel>=1 ) if (!iam) printf("[%d] .. BIG V bigv_size %d, using buffer_size %d (on GPU)\n", iam, bigv_size, buffer_size); #endif if ( checkCuda(cudaHostAlloc((void**)&bigV, bigv_size * sizeof(double) ,cudaHostAllocDefault)) ) ABORT("Malloc fails for dgemm buffer V"); DisplayHeader(); #if ( PRNTlevel>=1 ) printf(" Starting with %d Cuda Streams \n",nstreams ); #endif cublasHandle_t *handle; handle = (cublasHandle_t *) SUPERLU_MALLOC(sizeof(cublasHandle_t)*nstreams); for(int i = 0; i < nstreams; i++) handle[i] = create_handle(); // creating streams cudaStream_t *streams; streams = (cudaStream_t *) SUPERLU_MALLOC(sizeof(cudaStream_t)*nstreams); for (int i = 0; i < nstreams; ++i) checkCuda( cudaStreamCreate(&streams[i]) ); // allocating data in device double *dA, *dB, *dC; cudaError_t cudaStat; #if 0 // cudaStat = cudaMalloc( (void**)&dA, m*k*sizeof(double)); // HOw much should be the size of dA? // for time being just making it // cudaStat = cudaMalloc( (void**)&dA, ((max_row_size*sp_ienv_dist(3)))* sizeof(double)); #endif cudaStat = cudaMalloc( (void**)&dA, max_row_size*sp_ienv_dist(3)* sizeof(double)); if (cudaStat!= cudaSuccess) { fprintf(stderr, "!!!! Error in allocating A in the device %ld \n",m*k*sizeof(double) ); return 1; } // size of B should be max_supernode_size*buffer cudaStat = cudaMalloc((void**)&dB, bigu_size * sizeof(double)); if (cudaStat!= cudaSuccess) { fprintf(stderr, "!!!! Error in allocating B in the device %ld \n",n*k*sizeof(double)); return 1; } cudaStat = cudaMalloc((void**)&dC, buffer_size* sizeof(double) ); if (cudaStat!= cudaSuccess) { fprintf(stderr, "!!!! Error in allocating C in the device \n" ); return 1; } stat->gpu_buffer += ( max_row_size * sp_ienv_dist(3) + bigu_size + buffer_size ) * dword; #else /* not CUDA */ // for GEMM padding 0 j = bigu_size / ldt; bigu_size += (gemm_k_pad * (j + ldt + gemm_n_pad)); bigv_size += (gemm_m_pad * (j + max_row_size + gemm_n_pad)); #ifdef __INTEL_COMPILER bigU = _mm_malloc(bigu_size * sizeof(double), 1<<12); // align at 4K page bigV = _mm_malloc(bigv_size * sizeof(double), 1<<12); #else if ( !(bigU = doubleMalloc_dist(bigu_size)) ) ABORT ("Malloc fails for dgemm U buffer"); //Maximum size of bigU= sqrt(buffsize) ? // int bigv_size = 8 * ldt * ldt * num_threads; if ( !(bigV = doubleMalloc_dist(bigv_size)) ) ABORT ("Malloc failed for dgemm V buffer"); #endif #endif /* end ifdef GPU_ACC */ log_memory((bigv_size + bigu_size) * dword, stat); // mlock(bigU,(bigu_size) * sizeof (double)); #if ( PRNTlevel>=1 ) if(!iam) { printf (" Max row size is %d \n", max_row_size); printf (" Threads per process %d \n", num_threads); fflush(stdout); } #endif #if 0 /* Sherry */ if (!(tempv2d = doubleCalloc_dist (2 * ((size_t) ldt) * ldt))) ABORT ("Calloc fails for tempv2d[]."); tempU2d = tempv2d + ldt * ldt; #endif /* Sherry: (ldt + 16), avoid cache line false sharing. KNL cacheline size = 64 bytes = 16 int */ iinfo = ldt + CACHELINE / sizeof(int); if (!(indirect = SUPERLU_MALLOC (iinfo * num_threads * sizeof(int)))) ABORT ("Malloc fails for indirect[]."); if (!(indirect2 = SUPERLU_MALLOC (iinfo * num_threads * sizeof(int)))) ABORT ("Malloc fails for indirect[]."); if (!(iuip = intMalloc_dist (k))) ABORT ("Malloc fails for iuip[]."); if (!(ruip = intMalloc_dist (k))) ABORT ("Malloc fails for ruip[]."); log_memory(2 * ldt*ldt * dword + 2 * iinfo * num_threads * iword + 2 * k * iword, stat); int_t *lookAheadFullRow,*lookAheadStRow,*lookAhead_lptr,*lookAhead_ib, *RemainFullRow,*RemainStRow,*Remain_lptr,*Remain_ib; lookAheadFullRow = intMalloc_dist( (num_look_aheads+1) ); lookAheadStRow = intMalloc_dist( (num_look_aheads+1) ); lookAhead_lptr = intMalloc_dist( (num_look_aheads+1) ); lookAhead_ib = intMalloc_dist( (num_look_aheads+1) ); int_t mrb= (nsupers+Pr-1) / Pr; int_t mcb= (nsupers+Pc-1) / Pc; RemainFullRow = intMalloc_dist(mrb); RemainStRow = intMalloc_dist(mrb); #if 0 Remain_lptr = (int *) _mm_malloc(sizeof(int)*mrb,1); #else Remain_lptr = intMalloc_dist(mrb); #endif // mlock(Remain_lptr, sizeof(int)*mrb ); Remain_ib = intMalloc_dist(mrb); Remain_info_t *Remain_info; #if 0 Remain_info = (Remain_info_t *) _mm_malloc(mrb*sizeof(Remain_info_t),64); #else Remain_info = (Remain_info_t *) SUPERLU_MALLOC(mrb*sizeof(Remain_info_t)); #endif double *lookAhead_L_buff, *Remain_L_buff; /* Stores entire L-panel */ Ublock_info_t *Ublock_info; ldt = sp_ienv_dist (3); /* max supernode size */ /* The following is quite loose */ lookAhead_L_buff = doubleMalloc_dist(ldt*ldt* (num_look_aheads+1) ); #if 0 Remain_L_buff = (double *) _mm_malloc( sizeof(double)*(Llu->bufmax[1]),64); Ublock_info = (Ublock_info_t *) _mm_malloc(mcb*sizeof(Ublock_info_t),64); int * Ublock_info_iukp = (int *) _mm_malloc(mcb*sizeof(int),64); int * Ublock_info_rukp = (int *) _mm_malloc(mcb*sizeof(int),64); int * Ublock_info_jb = (int *) _mm_malloc(mcb*sizeof(int),64); #else j = gemm_m_pad * (ldt + max_row_size + gemm_k_pad); Remain_L_buff = doubleMalloc_dist(Llu->bufmax[1] + j); /* This is loose */ Ublock_info = (Ublock_info_t *) SUPERLU_MALLOC(mcb*sizeof(Ublock_info_t)); int *Ublock_info_iukp = (int *) SUPERLU_MALLOC(mcb*sizeof(int)); int *Ublock_info_rukp = (int *) SUPERLU_MALLOC(mcb*sizeof(int)); int *Ublock_info_jb = (int *) SUPERLU_MALLOC(mcb*sizeof(int)); #endif long long alloc_mem = 4 * mrb * iword + mrb * sizeof(Remain_info_t) + ldt * ldt * (num_look_aheads+1) * dword + Llu->bufmax[1] * dword ; log_memory(alloc_mem, stat); InitTimer = SuperLU_timer_() - tt1; double pxgstrfTimer = SuperLU_timer_(); /* ################################################################## ** Handle first block column separately to start the pipeline. ** ################################################################## */ look_id = 0; msgcnt = msgcnts[0]; /* Lsub[0] to be transferred */ send_req = send_reqs[0]; recv_req = recv_reqs[0]; k0 = 0; k = perm_c_supno[0]; kcol = PCOL (k, grid); krow = PROW (k, grid); if (mycol == kcol) { double ttt1 = SuperLU_timer_(); /* panel factorization */ PDGSTRF2 (options, k0, k, thresh, Glu_persist, grid, Llu, U_diag_blk_send_req, tag_ub, stat, info); pdgstrf2_timer += SuperLU_timer_()-ttt1; scp = &grid->rscp; /* The scope of process row. */ /* Multicasts numeric values of L(:,0) to process rows. */ lk = LBj (k, grid); /* Local block number. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; if (lsub) { /* number of entries in Lsub_buf[] to be transferred */ msgcnt[0] = lsub[1] + BC_HEADER + lsub[0] * LB_DESCRIPTOR; /* number of entries in Lval_buf[] to be transferred */ msgcnt[1] = lsub[1] * SuperSize (k); } else { msgcnt[0] = msgcnt[1] = 0; } for (pj = 0; pj < Pc; ++pj) { if (ToSendR[lk][pj] != EMPTY) { #if ( PROFlevel>=1 ) TIC (t1); #endif MPI_Isend (lsub, msgcnt[0], mpi_int_t, pj, SLU_MPI_TAG (0, 0) /* 0 */, scp->comm, &send_req[pj]); MPI_Isend (lusup, msgcnt[1], MPI_DOUBLE, pj, SLU_MPI_TAG (1, 0) /* 1 */, scp->comm, &send_req[pj + Pc]); #if ( DEBUGlevel>=2 ) printf ("[%d] first block cloumn Send L(:,%4d): lsub %4d, lusup %4d to Pc %2d\n", iam, 0, msgcnt[0], msgcnt[1], pj); #endif #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_RIGHT] += t2; ++prof_sendR[lk]; msg_cnt += 2; msg_vol += msgcnt[0] * iword + msgcnt[1] * dword; #endif } /* end if */ } /* end for pj ... */ } else { /* Post immediate receives. */ if (ToRecv[k] >= 1) { /* Recv block column L(:,0). */ scp = &grid->rscp; /* The scope of process row. */ #if ( PROFlevel>=1 ) TIC (t1); #endif MPI_Irecv (Lsub_buf_2[0], Llu->bufmax[0], mpi_int_t, kcol, SLU_MPI_TAG (0, 0) /* 0 */ , scp->comm, &recv_req[0]); MPI_Irecv (Lval_buf_2[0], Llu->bufmax[1], MPI_DOUBLE, kcol, SLU_MPI_TAG (1, 0) /* 1 */ , scp->comm, &recv_req[1]); #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_RIGHT] += t2; #endif } } /* end if mycol == 0 */ factored[k] = 0; /* flag column k as factored. */ /* post receive of first U-row */ if (myrow != krow) { if (ToRecv[k] == 2) { /* Recv block row U(k,:). */ scp = &grid->cscp; /* The scope of process column. */ Usub_buf = Llu->Usub_buf_2[0]; Uval_buf = Llu->Uval_buf_2[0]; #if ( PROFlevel>=1 ) TIC (t1); #endif MPI_Irecv (Usub_buf, Llu->bufmax[2], mpi_int_t, krow, SLU_MPI_TAG (2, 0) /* 2%tag_ub */ , scp->comm, &recv_reqs_u[0][0]); MPI_Irecv (Uval_buf, Llu->bufmax[3], MPI_DOUBLE, krow, SLU_MPI_TAG (3, 0) /* 3%tag_ub */ , scp->comm, &recv_reqs_u[0][1]); #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_DOWN] += t2; #endif } } /* ################################################################## **** MAIN LOOP **** ################################################################## */ for (k0 = 0; k0 < nsupers; ++k0) { k = perm_c_supno[k0]; /* ============================================ * * ======= look-ahead the new L columns ======= * * ============================================ */ /* tt1 = SuperLU_timer_(); */ if (k0 == 0) { /* look-ahead all the columns in the window */ kk1 = k0 + 1; kk2 = SUPERLU_MIN (k0 + num_look_aheads, nsupers - 1); } else { /* look-ahead one new column after the current window */ kk1 = k0 + num_look_aheads; kk2 = SUPERLU_MIN (kk1, nsupers - 1); } for (kk0 = kk1; kk0 <= kk2; kk0++) { /* loop through look-ahead window in L */ kk = perm_c_supno[kk0]; /* use the ordering from static schedule */ look_id = kk0 % (1 + num_look_aheads); /* which column in window */ if (look_ahead[kk] < k0) { /* does not depend on current column k */ kcol = PCOL (kk, grid); if (mycol == kcol) { /* I own this panel */ /* Panel factorization -- Factor diagonal and subdiagonal L blocks and test for exact singularity. */ factored[kk] = 0; /* flag column kk as factored */ double ttt1 = SuperLU_timer_(); PDGSTRF2 (options, kk0, kk, thresh, Glu_persist, grid, Llu, U_diag_blk_send_req, tag_ub, stat, info); pdgstrf2_timer += SuperLU_timer_() - ttt1; /* Multicasts numeric values of L(:,kk) to process rows. */ /* ttt1 = SuperLU_timer_(); */ msgcnt = msgcnts[look_id]; /* point to the proper count array */ send_req = send_reqs[look_id]; lk = LBj (kk, grid); /* Local block number in L. */ lsub1 = Lrowind_bc_ptr[lk]; if (lsub1) { msgcnt[0] = lsub1[1] + BC_HEADER + lsub1[0] * LB_DESCRIPTOR; /* size of metadata */ msgcnt[1] = lsub1[1] * SuperSize (kk); /* Lval_buf[] size */ } else { msgcnt[0] = 0; msgcnt[1] = 0; } scp = &grid->rscp; /* The scope of process row. */ for (pj = 0; pj < Pc; ++pj) { if (ToSendR[lk][pj] != EMPTY) { lusup1 = Lnzval_bc_ptr[lk]; #if ( PROFlevel>=1 ) TIC (t1); #endif MPI_Isend (lsub1, msgcnt[0], mpi_int_t, pj, SLU_MPI_TAG (0, kk0), /* (4*kk0)%tag_ub */ scp->comm, &send_req[pj]); MPI_Isend (lusup1, msgcnt[1], MPI_DOUBLE, pj, SLU_MPI_TAG (1, kk0), /* (4*kk0+1)%tag_ub */ scp->comm, &send_req[pj + Pc]); #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_RIGHT] += t2; ++prof_sendR[lk]; #endif #if ( DEBUGlevel>=2 ) printf ("[%d] -1- Send L(:,%4d): #lsub1 %4d, #lusup1 %4d right to Pj %2d\n", iam, kk, msgcnt[0], msgcnt[1], pj); #endif } } /* stat->time9 += SuperLU_timer_() - ttt1; */ } else { /* Post Recv of block column L(:,kk). */ /* double ttt1 = SuperLU_timer_(); */ if (ToRecv[kk] >= 1) { scp = &grid->rscp; /* The scope of process row. */ recv_req = recv_reqs[look_id]; #if ( PROFlevel>=1 ) TIC (t1); #endif MPI_Irecv (Lsub_buf_2[look_id], Llu->bufmax[0], mpi_int_t, kcol, SLU_MPI_TAG (0, kk0), /* (4*kk0)%tag_ub */ scp->comm, &recv_req[0]); MPI_Irecv (Lval_buf_2[look_id], Llu->bufmax[1], MPI_DOUBLE, kcol, SLU_MPI_TAG (1, kk0), /* (4*kk0+1)%tag_ub */ scp->comm, &recv_req[1]); #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_RIGHT] += t2; #endif } /* stat->time10 += SuperLU_timer_() - ttt1; */ } /* end if mycol == Pc(kk) */ } /* end if look-ahead in L panels */ /* Pre-post irecv for U-row look-ahead */ krow = PROW (kk, grid); if (myrow != krow) { if (ToRecv[kk] == 2) { /* post iRecv block row U(kk,:). */ scp = &grid->cscp; /* The scope of process column. */ Usub_buf = Llu->Usub_buf_2[look_id]; Uval_buf = Llu->Uval_buf_2[look_id]; #if ( PROFlevel>=1 ) TIC (t1); #endif MPI_Irecv (Usub_buf, Llu->bufmax[2], mpi_int_t, krow, SLU_MPI_TAG (2, kk0) /* (4*kk0+2)%tag_ub */ , scp->comm, &recv_reqs_u[look_id][0]); MPI_Irecv (Uval_buf, Llu->bufmax[3], MPI_DOUBLE, krow, SLU_MPI_TAG (3, kk0) /* (4*kk0+3)%tag_ub */ , scp->comm, &recv_reqs_u[look_id][1]); #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_DOWN] += t2; #endif } } } /* end for each column in look-ahead window for L panels */ /* stat->time4 += SuperLU_timer_()-tt1; */ /* ================================= * * ==== look-ahead the U rows === * * ================================= */ kk1 = k0; kk2 = SUPERLU_MIN (k0 + num_look_aheads, nsupers - 1); for (kk0 = kk1; kk0 < kk2; kk0++) { kk = perm_c_supno[kk0]; /* order determined from static schedule */ if (factoredU[kk0] != 1 && look_ahead[kk] < k0) { /* does not depend on current column k */ kcol = PCOL (kk, grid); krow = PROW (kk, grid); lk = LBj (kk, grid); /* Local block number across row. NOT USED?? -- Sherry */ look_id = kk0 % (1 + num_look_aheads); msgcnt = msgcntsU[look_id]; recv_req = recv_reqs[look_id]; /* ================================================= * * Check if diagonal block has been received * * for panel factorization of U in look-ahead window * * ================================================= */ if (mycol == kcol) { /* I own this column panel, no need to receive L */ flag0 = flag1 = 1; msgcnt[0] = msgcnt[1] = -1; /* No need to transfer Lsub, nor Lval */ } else { /* Check to receive L(:,kk) from the left */ flag0 = flag1 = 0; if ( ToRecv[kk] >= 1 ) { #if ( PROFlevel>=1 ) TIC (t1); #endif if ( recv_req[0] != MPI_REQUEST_NULL ) { MPI_Test (&recv_req[0], &flag0, &status); if ( flag0 ) { MPI_Get_count (&status, mpi_int_t, &msgcnt[0]); recv_req[0] = MPI_REQUEST_NULL; } } else flag0 = 1; if ( recv_req[1] != MPI_REQUEST_NULL ) { MPI_Test (&recv_req[1], &flag1, &status); if ( flag1 ) { MPI_Get_count (&status, mpi_int_t, &msgcnt[1]); recv_req[1] = MPI_REQUEST_NULL; } } else flag1 = 1; #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_RIGHT] += t2; #endif } else { msgcnt[0] = 0; } } if (flag0 && flag1) { /* L(:,kk) is ready */ /* tt1 = SuperLU_timer_(); */ scp = &grid->cscp; /* The scope of process column. */ if (myrow == krow) { factoredU[kk0] = 1; /* Parallel triangular solve across process row *krow* -- U(k,j) = L(k,k) \ A(k,j). */ double ttt2 = SuperLU_timer_(); #ifdef _OPENMP /* #pragma omp parallel */ /* Sherry -- parallel done inside pdgstrs2 */ #endif { PDGSTRS2 (kk0, kk, Glu_persist, grid, Llu, stat); } pdgstrs2_timer += SuperLU_timer_()-ttt2; /* stat->time8 += SuperLU_timer_()-ttt2; */ /* Multicasts U(kk,:) to process columns. */ lk = LBi (kk, grid); usub = Ufstnz_br_ptr[lk]; uval = Unzval_br_ptr[lk]; if (usub) { msgcnt[2] = usub[2]; /* metadata size */ msgcnt[3] = usub[1]; /* Uval[] size */ } else { msgcnt[2] = msgcnt[3] = 0; } if (ToSendD[lk] == YES) { for (pi = 0; pi < Pr; ++pi) { if (pi != myrow) { #if ( PROFlevel>=1 ) TIC (t1); #endif MPI_Isend (usub, msgcnt[2], mpi_int_t, pi, SLU_MPI_TAG (2, kk0), /* (4*kk0+2)%tag_ub */ scp->comm, &send_reqs_u[look_id][pi]); MPI_Isend (uval, msgcnt[3], MPI_DOUBLE, pi, SLU_MPI_TAG (3, kk0), /* (4*kk0+3)%tag_ub */ scp->comm, &send_reqs_u[look_id][pi + Pr]); #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; msg_cnt += 2; msg_vol += msgcnt[2] * iword + msgcnt[3] * dword; #endif #if ( DEBUGlevel>=2 ) printf ("[%d] Send U(%4d,:) to Pr %2d\n", iam, k, pi); #endif } /* if pi ... */ } /* for pi ... */ } /* if ToSendD ... */ /* stat->time2 += SuperLU_timer_()-tt1; */ } /* end if myrow == krow */ } /* end if flag0 & flag1 ... */ } /* end if factoredU[] ... */ } /* end for kk0 ... */ /* ============================================== * * == start processing the current row of U(k,:) * * ============================================== */ knsupc = SuperSize (k); krow = PROW (k, grid); kcol = PCOL (k, grid); /* tt1 = SuperLU_timer_(); */ look_id = k0 % (1 + num_look_aheads); recv_req = recv_reqs[look_id]; send_req = send_reqs[look_id]; msgcnt = msgcnts[look_id]; Usub_buf = Llu->Usub_buf_2[look_id]; Uval_buf = Llu->Uval_buf_2[look_id]; if (mycol == kcol) { lk = LBj (k, grid); /* Local block number in L */ #if ( PROFlevel>=1 ) TIC(t1); #endif for (pj = 0; pj < Pc; ++pj) { /* Wait for Isend to complete before using lsub/lusup buffer. */ if (ToSendR[lk][pj] != EMPTY) { MPI_Wait (&send_req[pj], &status); MPI_Wait (&send_req[pj + Pc], &status); } } #if ( PROFlevel>=1 ) TOC(t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_RIGHT] += t2; #endif lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; } else { if (ToRecv[k] >= 1) { /* Recv block column L(:,k). */ scp = &grid->rscp; /* The scope of process row. */ /* ============================================= * * Waiting for L(:,kk) for outer-product uptate * * if iam in U(kk,:), then the diagonal block * * did not reach in time for panel factorization * * of U(k,:). * * ============================================= */ #if ( PROFlevel>=1 ) TIC (t1); #endif if (recv_req[0] != MPI_REQUEST_NULL) { MPI_Wait (&recv_req[0], &status); MPI_Get_count (&status, mpi_int_t, &msgcnt[0]); recv_req[0] = MPI_REQUEST_NULL; } else { msgcnt[0] = msgcntsU[look_id][0]; #if (DEBUGlevel>=2) printf("\t[%d] k=%d, look_id=%d, recv_req[0] == MPI_REQUEST_NULL, msgcnt[0] = %d\n", iam, k, look_id, msgcnt[0]); #endif } if (recv_req[1] != MPI_REQUEST_NULL) { MPI_Wait (&recv_req[1], &status); MPI_Get_count (&status, MPI_DOUBLE, &msgcnt[1]); recv_req[1] = MPI_REQUEST_NULL; } else { msgcnt[1] = msgcntsU[look_id][1]; #if (DEBUGlevel>=2) printf("\t[%d] k=%d, look_id=%d, recv_req[1] == MPI_REQUEST_NULL, msgcnt[1] = %d\n", iam, k, look_id, msgcnt[1]); #endif } #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_RIGHT] += t2; #endif #if ( DEBUGlevel>=2 ) printf("[%d] Recv L(:,%4d): #lsub %4d, #lusup %4d from Pc %2d\n", iam, k, msgcnt[0], msgcnt[1], kcol); fflush (stdout); #endif #if ( PRNTlevel==3 ) ++total_msg; if (!msgcnt[0]) ++zero_msg; #endif } else { msgcnt[0] = 0; } lsub = Lsub_buf_2[look_id]; lusup = Lval_buf_2[look_id]; } /* else if mycol = Pc(k) */ /* stat->time1 += SuperLU_timer_()-tt1; */ scp = &grid->cscp; /* The scope of process column. */ /* tt1 = SuperLU_timer_(); */ if (myrow == krow) { /* I own U(k,:) */ lk = LBi (k, grid); usub = Ufstnz_br_ptr[lk]; uval = Unzval_br_ptr[lk]; if (factoredU[k0] == -1) { /* Parallel triangular solve across process row *krow* -- U(k,j) = L(k,k) \ A(k,j). */ double ttt2 = SuperLU_timer_(); #ifdef _OPENMP /* #pragma omp parallel */ /* Sherry -- parallel done inside pdgstrs2 */ #endif { PDGSTRS2 (k0, k, Glu_persist, grid, Llu, stat); } pdgstrs2_timer += SuperLU_timer_() - ttt2; /* Sherry -- need to set factoredU[k0] = 1; ?? */ /* Multicasts U(k,:) along process columns. */ if ( usub ) { msgcnt[2] = usub[2]; /* metadata size */ msgcnt[3] = usub[1]; /* Uval[] size */ } else { msgcnt[2] = msgcnt[3] = 0; } if (ToSendD[lk] == YES) { for (pi = 0; pi < Pr; ++pi) { if (pi != myrow) { /* Matching recv was pre-posted before */ #if ( PROFlevel>=1 ) TIC (t1); #endif MPI_Send (usub, msgcnt[2], mpi_int_t, pi, SLU_MPI_TAG (2, k0), /* (4*k0+2)%tag_ub */ scp->comm); MPI_Send (uval, msgcnt[3], MPI_DOUBLE, pi, SLU_MPI_TAG (3, k0), /* (4*k0+3)%tag_ub */ scp->comm); #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_DOWN] += t2; msg_cnt += 2; msg_vol += msgcnt[2] * iword + msgcnt[3] * dword; #endif #if ( DEBUGlevel>=2 ) printf ("[%d] Send U(%4d,:) down to Pr %2d\n", iam, k, pi); #endif } /* if pi ... */ } /* for pi ... */ } /* if ToSendD ... */ } else { /* Panel U(k,:) already factorized from previous look-ahead */ /* ================================================ * * Wait for downward sending of U(k,:) to complete * * for outer-product update. * * ================================================ */ if (ToSendD[lk] == YES) { #if ( PROFlevel>=1 ) TIC (t1); #endif for (pi = 0; pi < Pr; ++pi) { if (pi != myrow) { MPI_Wait (&send_reqs_u[look_id][pi], &status); MPI_Wait (&send_reqs_u[look_id][pi + Pr], &status); } } #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_DOWN] += t2; #endif } msgcnt[2] = msgcntsU[look_id][2]; msgcnt[3] = msgcntsU[look_id][3]; } /* stat->time2 += SuperLU_timer_()-tt1; */ } else { /* myrow != krow */ /* ========================================== * * Wait for U(k,:) for outer-product updates. * * ========================================== */ if (ToRecv[k] == 2) { /* Recv block row U(k,:). */ #if ( PROFlevel>=1 ) TIC (t1); #endif MPI_Wait (&recv_reqs_u[look_id][0], &status); MPI_Get_count (&status, mpi_int_t, &msgcnt[2]); MPI_Wait (&recv_reqs_u[look_id][1], &status); MPI_Get_count (&status, MPI_DOUBLE, &msgcnt[3]); #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_DOWN] += t2; #endif usub = Usub_buf; uval = Uval_buf; #if ( DEBUGlevel>=2 ) printf ("[%d] Recv U(%4d,:) from Pr %2d\n", iam, k, krow); #endif #if ( PRNTlevel==3 ) ++total_msg; if (!msgcnt[2]) ++zero_msg; #endif } else { msgcnt[2] = 0; } /* stat->time6 += SuperLU_timer_()-tt1; */ } /* end if myrow == Pr(k) */ /* * Parallel rank-k update; pair up blocks L(i,k) and U(k,j). * for (j = k+1; k < N; ++k) { * for (i = k+1; i < N; ++i) * if ( myrow == PROW( i, grid ) && mycol == PCOL( j, grid ) * && L(i,k) != 0 && U(k,j) != 0 ) * A(i,j) = A(i,j) - L(i,k) * U(k,j); */ msg0 = msgcnt[0]; msg2 = msgcnt[2]; /* tt1 = SuperLU_timer_(); */ if (msg0 && msg2) { /* L(:,k) and U(k,:) are not empty. */ nsupr = lsub[1]; /* LDA of lusup. */ if (myrow == krow) { /* Skip diagonal block L(k,k). */ lptr0 = BC_HEADER + LB_DESCRIPTOR + lsub[BC_HEADER + 1]; luptr0 = knsupc; nlb = lsub[0] - 1; } else { lptr0 = BC_HEADER; luptr0 = 0; nlb = lsub[0]; } iukp = BR_HEADER; /* Skip header; Pointer to index[] of U(k,:) */ rukp = 0; /* Pointer to nzval[] of U(k,:) */ nub = usub[0]; /* Number of blocks in the block row U(k,:) */ klst = FstBlockC (k + 1); /* ------------------------------------------------------------- Update the look-ahead block columns A(:,k+1:k+num_look_ahead) ------------------------------------------------------------- */ iukp0 = iukp; rukp0 = rukp; /* reorder the remaining columns in bottome-up */ /* TAU_STATIC_TIMER_START("LOOK_AHEAD_UPDATE"); */ for (jj = 0; jj < nub; jj++) { #ifdef ISORT iperm_u[jj] = iperm_c_supno[usub[iukp]]; /* Global block number of block U(k,j). */ perm_u[jj] = jj; #else perm_u[2 * jj] = iperm_c_supno[usub[iukp]]; /* Global block number of block U(k,j). */ perm_u[2 * jj + 1] = jj; #endif jb = usub[iukp]; /* Global block number of block U(k,j). */ nsupc = SuperSize (jb); iukp += UB_DESCRIPTOR; /* Start fstnz of block U(k,j). */ iukp += nsupc; } iukp = iukp0; #ifdef ISORT isort (nub, iperm_u, perm_u); #else qsort (perm_u, (size_t) nub, 2 * sizeof (int_t), &superlu_sort_perm); #endif j = jj0 = 0; /************************************************************************/ #if 0 for (jj = 0; jj < nub; ++jj) assert(perm_u[jj] == jj); /* Sherry */ #endif double ttx =SuperLU_timer_(); //#include "dlook_ahead_update_v4.c" #include "dlook_ahead_update.c" lookaheadupdatetimer += SuperLU_timer_() - ttx; /************************************************************************/ /*ifdef OMP_LOOK_AHEAD */ /* TAU_STATIC_TIMER_STOP("LOOK_AHEAD_UPDATE"); */ } /* if L(:,k) and U(k,:) not empty */ /* stat->time3 += SuperLU_timer_()-tt1; */ /* ================== */ /* == post receive == */ /* ================== */ kk1 = SUPERLU_MIN (k0 + num_look_aheads, nsupers - 1); for (kk0 = k0 + 1; kk0 <= kk1; kk0++) { kk = perm_c_supno[kk0]; kcol = PCOL (kk, grid); if (look_ahead[kk] == k0) { if (mycol != kcol) { if (ToRecv[kk] >= 1) { scp = &grid->rscp; /* The scope of process row. */ look_id = kk0 % (1 + num_look_aheads); recv_req = recv_reqs[look_id]; #if ( PROFlevel>=1 ) TIC (t1); #endif MPI_Irecv (Lsub_buf_2[look_id], Llu->bufmax[0], mpi_int_t, kcol, SLU_MPI_TAG (0, kk0), /* (4*kk0)%tag_ub */ scp->comm, &recv_req[0]); MPI_Irecv (Lval_buf_2[look_id], Llu->bufmax[1], MPI_DOUBLE, kcol, SLU_MPI_TAG (1, kk0), /* (4*kk0+1)%tag_ub */ scp->comm, &recv_req[1]); #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_RIGHT] += t2; #endif } } else { lk = LBj (kk, grid); /* Local block number. */ lsub1 = Lrowind_bc_ptr[lk]; lusup1 = Lnzval_bc_ptr[lk]; if (factored[kk] == -1) { /* Factor diagonal and subdiagonal blocks and test for exact singularity. */ factored[kk] = 0; /* flag column kk as factored */ double ttt1 = SuperLU_timer_(); PDGSTRF2 (options, kk0, kk, thresh, Glu_persist, grid, Llu, U_diag_blk_send_req, tag_ub, stat, info); pdgstrf2_timer += SuperLU_timer_() - ttt1; /* Process column *kcol+1* multicasts numeric values of L(:,k+1) to process rows. */ look_id = kk0 % (1 + num_look_aheads); send_req = send_reqs[look_id]; msgcnt = msgcnts[look_id]; if (lsub1) { msgcnt[0] = lsub1[1] + BC_HEADER + lsub1[0] * LB_DESCRIPTOR; msgcnt[1] = lsub1[1] * SuperSize (kk); } else { msgcnt[0] = 0; msgcnt[1] = 0; } scp = &grid->rscp; /* The scope of process row. */ for (pj = 0; pj < Pc; ++pj) { if (ToSendR[lk][pj] != EMPTY) { #if ( PROFlevel>=1 ) TIC (t1); #endif MPI_Isend (lsub1, msgcnt[0], mpi_int_t, pj, SLU_MPI_TAG (0, kk0), /* (4*kk0)%tag_ub */ scp->comm, &send_req[pj]); MPI_Isend (lusup1, msgcnt[1], MPI_DOUBLE, pj, SLU_MPI_TAG (1, kk0), /* (4*kk0+1)%tag_ub */ scp->comm, &send_req[pj + Pc]); #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_RIGHT] += t2; ++prof_sendR[lk]; #endif } } /* end for pj ... */ } /* if factored[kk] ... */ } } } double tsch = SuperLU_timer_(); /*******************************************************************/ #ifdef GPU_ACC #include "dSchCompUdt-cuda.c" #else /*#include "SchCompUdt--Phi-2Ddynamic-alt.c"*/ //#include "dSchCompUdt-2Ddynamic_v6.c" #include "dSchCompUdt-2Ddynamic.c" #endif /*uncomment following to compare against SuperLU 3.3 baseline*/ /* #include "SchCompUdt--baseline.c" */ /************************************************************************/ NetSchurUpTimer += SuperLU_timer_() - tsch; } /* MAIN LOOP for k0 = 0, ... */ /* ################################################################## ** END MAIN LOOP: for k0 = ... ################################################################## */ pxgstrfTimer = SuperLU_timer_() - pxgstrfTimer; #if ( PRNTlevel>=1 ) /* Print detailed statistics */ /* Updating total flops */ double allflops; MPI_Reduce(&RemainGEMM_flops, &allflops, 1, MPI_DOUBLE, MPI_SUM, 0, grid->comm); if ( iam==0 ) { printf("\nInitialization time\t%8.2lf seconds\n" "\t Serial: compute static schedule, allocate storage\n", InitTimer); printf("\n==== Time breakdown in factorization (rank 0) ====\n"); printf("Panel factorization \t %8.2lf seconds\n", pdgstrf2_timer + pdgstrs2_timer); printf(".. L-panel pxgstrf2 \t %8.2lf seconds\n", pdgstrf2_timer); printf(".. U-panel pxgstrs2 \t %8.2lf seconds\n", pdgstrs2_timer); printf("Time in Look-ahead update \t %8.2lf seconds\n", lookaheadupdatetimer); printf("Time in Schur update \t\t %8.2lf seconds\n", NetSchurUpTimer); printf(".. Time to Gather L buffer\t %8.2lf (Separate L panel by Lookahead/Remain)\n", GatherLTimer); printf(".. Time to Gather U buffer\t %8.2lf \n", GatherUTimer); printf(".. Time in GEMM %8.2lf \n", LookAheadGEMMTimer + RemainGEMMTimer); printf("\t* Look-ahead\t %8.2lf \n", LookAheadGEMMTimer); printf("\t* Remain\t %8.2lf\tFlops %8.2le\tGflops %8.2lf\n", RemainGEMMTimer, allflops, allflops/RemainGEMMTimer*1e-9); printf(".. Time to Scatter %8.2lf \n", LookAheadScatterTimer + RemainScatterTimer); printf("\t* Look-ahead\t %8.2lf \n", LookAheadScatterTimer); printf("\t* Remain\t %8.2lf \n", RemainScatterTimer); printf("Total factorization time \t: %8.2lf seconds, \n", pxgstrfTimer); printf("--------\n"); printf("GEMM maximum block: %d-%d-%d\n", gemm_max_m, gemm_max_k, gemm_max_n); } #endif #if ( DEBUGlevel>=3 ) for (i = 0; i < Pr * Pc; ++i) { if (iam == i) { dPrintLblocks(iam, nsupers, grid, Glu_persist, Llu); dPrintUblocks(iam, nsupers, grid, Glu_persist, Llu); printf ("(%d)\n", iam); PrintInt10 ("Recv", nsupers, Llu->ToRecv); } MPI_Barrier (grid->comm); } #endif /******************************************************** * Free memory * ********************************************************/ if (Pr * Pc > 1) { SUPERLU_FREE (Lsub_buf_2[0]); /* also free Lsub_buf_2[1] */ SUPERLU_FREE (Lval_buf_2[0]); /* also free Lval_buf_2[1] */ if (Llu->bufmax[2] != 0) SUPERLU_FREE (Usub_buf_2[0]); if (Llu->bufmax[3] != 0) SUPERLU_FREE (Uval_buf_2[0]); if (U_diag_blk_send_req[myrow] != MPI_REQUEST_NULL) { /* wait for last Isend requests to complete, deallocate objects */ for (krow = 0; krow < Pr; ++krow) { if (krow != myrow) MPI_Wait (U_diag_blk_send_req + krow, &status); } } SUPERLU_FREE (U_diag_blk_send_req); } log_memory( -((Llu->bufmax[0] + Llu->bufmax[2]) * (num_look_aheads + 1) * iword + (Llu->bufmax[1] + Llu->bufmax[3]) * (num_look_aheads + 1) * dword), stat ); SUPERLU_FREE (Lsub_buf_2); SUPERLU_FREE (Lval_buf_2); SUPERLU_FREE (Usub_buf_2); SUPERLU_FREE (Uval_buf_2); SUPERLU_FREE (perm_c_supno); SUPERLU_FREE (perm_u); #ifdef ISORT SUPERLU_FREE (iperm_u); #endif SUPERLU_FREE (look_ahead); SUPERLU_FREE (factoredU); SUPERLU_FREE (factored); log_memory(-(6 * nsupers * iword), stat); for (i = 0; i <= num_look_aheads; i++) { SUPERLU_FREE (msgcnts[i]); SUPERLU_FREE (msgcntsU[i]); } SUPERLU_FREE (msgcnts); SUPERLU_FREE (msgcntsU); for (i = 0; i <= num_look_aheads; i++) { SUPERLU_FREE (send_reqs_u[i]); SUPERLU_FREE (recv_reqs_u[i]); SUPERLU_FREE (send_reqs[i]); SUPERLU_FREE (recv_reqs[i]); } SUPERLU_FREE (recv_reqs_u); SUPERLU_FREE (send_reqs_u); SUPERLU_FREE (recv_reqs); SUPERLU_FREE (send_reqs); #ifdef GPU_ACC checkCuda (cudaFreeHost (bigV)); checkCuda (cudaFreeHost (bigU)); cudaFree( (void*)dA ); /* Sherry added */ cudaFree( (void*)dB ); cudaFree( (void*)dC ); SUPERLU_FREE( handle ); SUPERLU_FREE( streams ); SUPERLU_FREE( stream_end_col ); #else #ifdef __INTEL_COMPILER _mm_free (bigU); _mm_free (bigV); #else SUPERLU_FREE (bigV); SUPERLU_FREE (bigU); #endif /* Decrement freed memory from memory stat. */ log_memory(-(bigv_size + bigu_size) * dword, stat); #endif SUPERLU_FREE (Llu->ujrow); // SUPERLU_FREE (tempv2d);/* Sherry */ SUPERLU_FREE (indirect); SUPERLU_FREE (indirect2); /* Sherry added */ SUPERLU_FREE (iuip); SUPERLU_FREE (ruip); ldt = sp_ienv_dist(3); log_memory( -(3 * ldt *ldt * dword + 2 * ldt * num_threads * iword + 2 * k * iword), stat ); /* Sherry added */ SUPERLU_FREE(omp_loop_time); SUPERLU_FREE(full_u_cols); SUPERLU_FREE(blk_ldu); #if ( PRNTlevel>=1 ) log_memory(-2 * ncb * dword, stat); #endif SUPERLU_FREE(lookAheadFullRow); SUPERLU_FREE(lookAheadStRow); SUPERLU_FREE(lookAhead_lptr); SUPERLU_FREE(lookAhead_ib); SUPERLU_FREE(RemainFullRow); SUPERLU_FREE(RemainStRow); SUPERLU_FREE(Remain_lptr); SUPERLU_FREE(Remain_ib); SUPERLU_FREE(Remain_info); SUPERLU_FREE(lookAhead_L_buff); SUPERLU_FREE(Remain_L_buff); log_memory( -(4 * mrb * iword + mrb * sizeof(Remain_info_t) + ldt * ldt * (num_look_aheads + 1) * dword + Llu->bufmax[1] * dword), stat ); SUPERLU_FREE(Ublock_info); SUPERLU_FREE(Ublock_info_iukp); SUPERLU_FREE(Ublock_info_rukp); SUPERLU_FREE(Ublock_info_jb); #if ( PROFlevel>=1 ) TIC (t1); #endif /* Prepare error message - find the smallesr index i that U(i,i)==0 */ if ( *info == 0 ) *info = n + 1; MPI_Allreduce (info, &iinfo, 1, MPI_INT, MPI_MIN, grid->comm); if ( iinfo == n + 1 ) *info = 0; else *info = iinfo; #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; { float msg_vol_max, msg_vol_sum, msg_cnt_max, msg_cnt_sum; MPI_Reduce (&msg_cnt, &msg_cnt_sum, 1, MPI_FLOAT, MPI_SUM, 0, grid->comm); MPI_Reduce (&msg_cnt, &msg_cnt_max, 1, MPI_FLOAT, MPI_MAX, 0, grid->comm); MPI_Reduce (&msg_vol, &msg_vol_sum, 1, MPI_FLOAT, MPI_SUM, 0, grid->comm); MPI_Reduce (&msg_vol, &msg_vol_max, 1, MPI_FLOAT, MPI_MAX, 0, grid->comm); if ( iam==0 ) { printf ("\tPDGSTRF comm stat:" "\tAvg\tMax\t\tAvg\tMax\n" "\t\t\tCount:\t%.0f\t%.0f\tVol(MB)\t%.2f\t%.2f\n", msg_cnt_sum / Pr / Pc, msg_cnt_max, msg_vol_sum / Pr / Pc * 1e-6, msg_vol_max * 1e-6); printf("\t\tcomm time on task 0: %8.2lf\n" "\t\t\tcomm down DIAG block %8.2lf\n" "\t\t\tcomm right L panel %8.2lf\n" "\t\t\tcomm down U panel %8.2lf\n", stat->utime[COMM], stat->utime[COMM_DIAG], stat->utime[COMM_RIGHT], stat->utime[COMM_DOWN]); //#include //int Digs = DECIMAL_DIG; printf("gemm_count %d\n", gemm_count); for (i = 0; i < gemm_count; ++i) fprintf(fgemm, "%8d%8d%8d\t %20.16e\t%8d\n", gemm_stats[i].m, gemm_stats[i].n, gemm_stats[i].k, gemm_stats[i].microseconds, prof_sendR[i]); fclose(fgemm); } SUPERLU_FREE(gemm_stats); SUPERLU_FREE(prof_sendR); } #endif #if ( PRNTlevel==3 ) MPI_Allreduce (&zero_msg, &iinfo, 1, MPI_INT, MPI_SUM, grid->comm); if (!iam) printf (".. # msg of zero size\t%d\n", iinfo); MPI_Allreduce (&total_msg, &iinfo, 1, MPI_INT, MPI_SUM, grid->comm); if (!iam) printf (".. # total msg\t%d\n", iinfo); #endif #if ( DEBUGlevel>=3 ) for (i = 0; i < Pr * Pc; ++i) { if (iam == i) { dPrintLblocks (iam, nsupers, grid, Glu_persist, Llu); dPrintUblocks (iam, nsupers, grid, Glu_persist, Llu); printf ("(%d)\n", iam); PrintInt10 ("Recv", nsupers, Llu->ToRecv); } MPI_Barrier (grid->comm); } #endif #if ( DEBUGlevel>=3 ) printf ("(%d) num_copy=%d, num_update=%d\n", iam, num_copy, num_update); #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC (iam, "Exit pdgstrf()"); #endif return 0; } /* PDGSTRF */ SuperLU_DIST_5.3.0/SRC/pdgstrs.c0000644013363400111340000012223513233431301015122 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Solves a system of distributed linear equations A*X = B with a * general N-by-N matrix A using the LU factors computed previously. * *
 * -- Distributed SuperLU routine (version 2.3) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * October 15, 2008
 * 
*/ #include "superlu_ddefs.h" /* * Sketch of the algorithm for L-solve: * ======================= * * Self-scheduling loop: * * while ( not finished ) { .. use message counter to control * * reveive a message; * * if ( message is Xk ) { * perform local block modifications into lsum[]; * lsum[i] -= L_i,k * X[k] * if all local updates done, Isend lsum[] to diagonal process; * * } else if ( message is LSUM ) { .. this must be a diagonal process * accumulate LSUM; * if ( all LSUM are received ) { * perform triangular solve for Xi; * Isend Xi down to the current process column; * perform local block modifications into lsum[]; * } * } * } * * * Auxiliary data structures: lsum[] / ilsum (pointer to lsum array) * ======================= * * lsum[] array (local) * + lsum has "nrhs" columns, row-wise is partitioned by supernodes * + stored by row blocks, column wise storage within a row block * + prepend a header recording the global block number. * * lsum[] ilsum[nsupers + 1] * * ----- * | | | <- header of size 2 --- * --------- <--------------------| | * | | | | | --- * | | | | | |-----------| | * | | | | | | --- * --------- | |-------| | * | | | <- header | | --- * --------- <--------| | |----| | * | | | | | | | --- * | | | | | | | * | | | | | | | * --------- | | * | | | <- header | | * --------- <------------| | * | | | | | | * | | | | | | * | | | | | | * --------- <---------------| */ /*#define ISEND_IRECV*/ /* * Function prototypes */ #ifdef _CRAY fortran void STRSM(_fcd, _fcd, _fcd, _fcd, int*, int*, double*, double*, int*, double*, int*); _fcd ftcs1; _fcd ftcs2; _fcd ftcs3; #endif /*! \brief * *
 * Purpose
 * =======
 *   Re-distribute B on the diagonal processes of the 2D process mesh.
 * 
 * Note
 * ====
 *   This routine can only be called after the routine pxgstrs_init(),
 *   in which the structures of the send and receive buffers are set up.
 *
 * Arguments
 * =========
 * 
 * B      (input) double*
 *        The distributed right-hand side matrix of the possibly
 *        equilibrated system.
 *
 * m_loc  (input) int (local)
 *        The local row dimension of matrix B.
 *
 * nrhs   (input) int (global)
 *        Number of right-hand sides.
 *
 * ldb    (input) int (local)
 *        Leading dimension of matrix B.
 *
 * fst_row (input) int (global)
 *        The row number of B's first row in the global matrix.
 *
 * ilsum  (input) int* (global)
 *        Starting position of each supernode in a full array.
 *
 * x      (output) double*
 *        The solution vector. It is valid only on the diagonal processes.
 *
 * ScalePermstruct (input) ScalePermstruct_t*
 *        The data structure to store the scaling and permutation vectors
 *        describing the transformations performed to the original matrix A.
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh.
 *
 * SOLVEstruct (input) SOLVEstruct_t*
 *        Contains the information for the communication during the
 *        solution phase.
 *
 * Return value
 * ============
 * 
*/ int_t pdReDistribute_B_to_X(double *B, int_t m_loc, int nrhs, int_t ldb, int_t fst_row, int_t *ilsum, double *x, ScalePermstruct_t *ScalePermstruct, Glu_persist_t *Glu_persist, gridinfo_t *grid, SOLVEstruct_t *SOLVEstruct) { int *SendCnt, *SendCnt_nrhs, *RecvCnt, *RecvCnt_nrhs; int *sdispls, *sdispls_nrhs, *rdispls, *rdispls_nrhs; int *ptr_to_ibuf, *ptr_to_dbuf; int_t *perm_r, *perm_c; /* row and column permutation vectors */ int_t *send_ibuf, *recv_ibuf; double *send_dbuf, *recv_dbuf; int_t *xsup, *supno; int_t i, ii, irow, gbi, j, jj, k, knsupc, l, lk; int p, procs; pxgstrs_comm_t *gstrs_comm = SOLVEstruct->gstrs_comm; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Enter pdReDistribute_B_to_X()"); #endif /* ------------------------------------------------------------ INITIALIZATION. ------------------------------------------------------------*/ perm_r = ScalePermstruct->perm_r; perm_c = ScalePermstruct->perm_c; procs = grid->nprow * grid->npcol; xsup = Glu_persist->xsup; supno = Glu_persist->supno; SendCnt = gstrs_comm->B_to_X_SendCnt; SendCnt_nrhs = gstrs_comm->B_to_X_SendCnt + procs; RecvCnt = gstrs_comm->B_to_X_SendCnt + 2*procs; RecvCnt_nrhs = gstrs_comm->B_to_X_SendCnt + 3*procs; sdispls = gstrs_comm->B_to_X_SendCnt + 4*procs; sdispls_nrhs = gstrs_comm->B_to_X_SendCnt + 5*procs; rdispls = gstrs_comm->B_to_X_SendCnt + 6*procs; rdispls_nrhs = gstrs_comm->B_to_X_SendCnt + 7*procs; ptr_to_ibuf = gstrs_comm->ptr_to_ibuf; ptr_to_dbuf = gstrs_comm->ptr_to_dbuf; /* ------------------------------------------------------------ NOW COMMUNICATE THE ACTUAL DATA. ------------------------------------------------------------*/ k = sdispls[procs-1] + SendCnt[procs-1]; /* Total number of sends */ l = rdispls[procs-1] + RecvCnt[procs-1]; /* Total number of receives */ if ( !(send_ibuf = intMalloc_dist(k + l)) ) ABORT("Malloc fails for send_ibuf[]."); recv_ibuf = send_ibuf + k; if ( !(send_dbuf = doubleMalloc_dist((k + l)* (size_t)nrhs)) ) ABORT("Malloc fails for send_dbuf[]."); recv_dbuf = send_dbuf + k * nrhs; for (p = 0; p < procs; ++p) { ptr_to_ibuf[p] = sdispls[p]; ptr_to_dbuf[p] = sdispls[p] * nrhs; } /* Copy the row indices and values to the send buffer. */ for (i = 0, l = fst_row; i < m_loc; ++i, ++l) { irow = perm_c[perm_r[l]]; /* Row number in Pc*Pr*B */ gbi = BlockNum( irow ); p = PNUM( PROW(gbi,grid), PCOL(gbi,grid), grid ); /* Diagonal process */ k = ptr_to_ibuf[p]; send_ibuf[k] = irow; k = ptr_to_dbuf[p]; RHS_ITERATE(j) { /* RHS is stored in row major in the buffer. */ send_dbuf[k++] = B[i + j*ldb]; } ++ptr_to_ibuf[p]; ptr_to_dbuf[p] += nrhs; } /* Communicate the (permuted) row indices. */ MPI_Alltoallv(send_ibuf, SendCnt, sdispls, mpi_int_t, recv_ibuf, RecvCnt, rdispls, mpi_int_t, grid->comm); /* Communicate the numerical values. */ MPI_Alltoallv(send_dbuf, SendCnt_nrhs, sdispls_nrhs, MPI_DOUBLE, recv_dbuf, RecvCnt_nrhs, rdispls_nrhs, MPI_DOUBLE, grid->comm); /* ------------------------------------------------------------ Copy buffer into X on the diagonal processes. ------------------------------------------------------------*/ ii = 0; for (p = 0; p < procs; ++p) { jj = rdispls_nrhs[p]; for (i = 0; i < RecvCnt[p]; ++i) { /* Only the diagonal processes do this; the off-diagonal processes have 0 RecvCnt. */ irow = recv_ibuf[ii]; /* The permuted row index. */ k = BlockNum( irow ); knsupc = SuperSize( k ); lk = LBi( k, grid ); /* Local block number. */ l = X_BLK( lk ); x[l - XK_H] = k; /* Block number prepended in the header. */ irow = irow - FstBlockC(k); /* Relative row number in X-block */ RHS_ITERATE(j) { x[l + irow + j*knsupc] = recv_dbuf[jj++]; } ++ii; } } SUPERLU_FREE(send_ibuf); SUPERLU_FREE(send_dbuf); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Exit pdReDistribute_B_to_X()"); #endif return 0; } /* pdReDistribute_B_to_X */ /*! \brief * *
 * Purpose
 * =======
 *   Re-distribute X on the diagonal processes to B distributed on all
 *   the processes.
 *
 * Note
 * ====
 *   This routine can only be called after the routine pxgstrs_init(),
 *   in which the structures of the send and receive buffers are set up.
 * 
*/ int_t pdReDistribute_X_to_B(int_t n, double *B, int_t m_loc, int_t ldb, int_t fst_row, int_t nrhs, double *x, int_t *ilsum, ScalePermstruct_t *ScalePermstruct, Glu_persist_t *Glu_persist, gridinfo_t *grid, SOLVEstruct_t *SOLVEstruct) { int_t i, ii, irow, j, jj, k, knsupc, nsupers, l, lk; int_t *xsup, *supno; int *SendCnt, *SendCnt_nrhs, *RecvCnt, *RecvCnt_nrhs; int *sdispls, *rdispls, *sdispls_nrhs, *rdispls_nrhs; int *ptr_to_ibuf, *ptr_to_dbuf; int_t *send_ibuf, *recv_ibuf; double *send_dbuf, *recv_dbuf; int_t *row_to_proc = SOLVEstruct->row_to_proc; /* row-process mapping */ pxgstrs_comm_t *gstrs_comm = SOLVEstruct->gstrs_comm; int iam, p, q, pkk, procs; int_t num_diag_procs, *diag_procs; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Enter pdReDistribute_X_to_B()"); #endif /* ------------------------------------------------------------ INITIALIZATION. ------------------------------------------------------------*/ xsup = Glu_persist->xsup; supno = Glu_persist->supno; nsupers = Glu_persist->supno[n-1] + 1; iam = grid->iam; procs = grid->nprow * grid->npcol; SendCnt = gstrs_comm->X_to_B_SendCnt; SendCnt_nrhs = gstrs_comm->X_to_B_SendCnt + procs; RecvCnt = gstrs_comm->X_to_B_SendCnt + 2*procs; RecvCnt_nrhs = gstrs_comm->X_to_B_SendCnt + 3*procs; sdispls = gstrs_comm->X_to_B_SendCnt + 4*procs; sdispls_nrhs = gstrs_comm->X_to_B_SendCnt + 5*procs; rdispls = gstrs_comm->X_to_B_SendCnt + 6*procs; rdispls_nrhs = gstrs_comm->X_to_B_SendCnt + 7*procs; ptr_to_ibuf = gstrs_comm->ptr_to_ibuf; ptr_to_dbuf = gstrs_comm->ptr_to_dbuf; k = sdispls[procs-1] + SendCnt[procs-1]; /* Total number of sends */ l = rdispls[procs-1] + RecvCnt[procs-1]; /* Total number of receives */ if ( !(send_ibuf = intMalloc_dist(k + l)) ) ABORT("Malloc fails for send_ibuf[]."); recv_ibuf = send_ibuf + k; if ( !(send_dbuf = doubleMalloc_dist((k + l)*nrhs)) ) ABORT("Malloc fails for send_dbuf[]."); recv_dbuf = send_dbuf + k * nrhs; for (p = 0; p < procs; ++p) { ptr_to_ibuf[p] = sdispls[p]; ptr_to_dbuf[p] = sdispls_nrhs[p]; } num_diag_procs = SOLVEstruct->num_diag_procs; diag_procs = SOLVEstruct->diag_procs; for (p = 0; p < num_diag_procs; ++p) { /* For all diagonal processes. */ pkk = diag_procs[p]; if ( iam == pkk ) { for (k = p; k < nsupers; k += num_diag_procs) { knsupc = SuperSize( k ); lk = LBi( k, grid ); /* Local block number */ irow = FstBlockC( k ); l = X_BLK( lk ); for (i = 0; i < knsupc; ++i) { #if 0 ii = inv_perm_c[irow]; /* Apply X <== Pc'*Y */ #else ii = irow; #endif q = row_to_proc[ii]; jj = ptr_to_ibuf[q]; send_ibuf[jj] = ii; jj = ptr_to_dbuf[q]; RHS_ITERATE(j) { /* RHS stored in row major in buffer. */ send_dbuf[jj++] = x[l + i + j*knsupc]; } ++ptr_to_ibuf[q]; ptr_to_dbuf[q] += nrhs; ++irow; } } } } /* ------------------------------------------------------------ COMMUNICATE THE (PERMUTED) ROW INDICES AND NUMERICAL VALUES. ------------------------------------------------------------*/ MPI_Alltoallv(send_ibuf, SendCnt, sdispls, mpi_int_t, recv_ibuf, RecvCnt, rdispls, mpi_int_t, grid->comm); MPI_Alltoallv(send_dbuf, SendCnt_nrhs, sdispls_nrhs, MPI_DOUBLE, recv_dbuf, RecvCnt_nrhs, rdispls_nrhs, MPI_DOUBLE, grid->comm); /* ------------------------------------------------------------ COPY THE BUFFER INTO B. ------------------------------------------------------------*/ for (i = 0, k = 0; i < m_loc; ++i) { irow = recv_ibuf[i]; irow -= fst_row; /* Relative row number */ RHS_ITERATE(j) { /* RHS is stored in row major in the buffer. */ B[irow + j*ldb] = recv_dbuf[k++]; } } SUPERLU_FREE(send_ibuf); SUPERLU_FREE(send_dbuf); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Exit pdReDistribute_X_to_B()"); #endif return 0; } /* pdReDistribute_X_to_B */ /*! \brief * *
 * Purpose
 * =======
 *
 * PDGSTRS solves a system of distributed linear equations
 * A*X = B with a general N-by-N matrix A using the LU factorization
 * computed by PDGSTRF.
 * If the equilibration, and row and column permutations were performed,
 * the LU factorization was performed for A1 where
 *     A1 = Pc*Pr*diag(R)*A*diag(C)*Pc^T = L*U
 * and the linear system solved is
 *     A1 * Y = Pc*Pr*B1, where B was overwritten by B1 = diag(R)*B, and
 * the permutation to B1 by Pc*Pr is applied internally in this routine.
 * 
 * Arguments
 * =========
 *
 * n      (input) int (global)
 *        The order of the system of linear equations.
 *
 * LUstruct (input) LUstruct_t*
 *        The distributed data structures storing L and U factors.
 *        The L and U factors are obtained from PDGSTRF for
 *        the possibly scaled and permuted matrix A.
 *        See superlu_ddefs.h for the definition of 'LUstruct_t'.
 *        A may be scaled and permuted into A1, so that
 *        A1 = Pc*Pr*diag(R)*A*diag(C)*Pc^T = L*U
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh. It contains the MPI communicator, the number
 *        of process rows (NPROW), the number of process columns (NPCOL),
 *        and my process rank. It is an input argument to all the
 *        parallel routines.
 *        Grid can be initialized by subroutine SUPERLU_GRIDINIT.
 *        See superlu_defs.h for the definition of 'gridinfo_t'.
 *
 * B      (input/output) double*
 *        On entry, the distributed right-hand side matrix of the possibly
 *        equilibrated system. That is, B may be overwritten by diag(R)*B.
 *        On exit, the distributed solution matrix Y of the possibly
 *        equilibrated system if info = 0, where Y = Pc*diag(C)^(-1)*X,
 *        and X is the solution of the original system.
 *
 * m_loc  (input) int (local)
 *        The local row dimension of matrix B.
 *
 * fst_row (input) int (global)
 *        The row number of B's first row in the global matrix.
 *
 * ldb    (input) int (local)
 *        The leading dimension of matrix B.
 *
 * nrhs   (input) int (global)
 *        Number of right-hand sides.
 * 
 * SOLVEstruct (input) SOLVEstruct_t* (global)
 *        Contains the information for the communication during the
 *        solution phase.
 *
 * stat   (output) SuperLUStat_t*
 *        Record the statistics about the triangular solves.
 *        See util.h for the definition of 'SuperLUStat_t'.
 *
 * info   (output) int*
 * 	   = 0: successful exit
 *	   < 0: if info = -i, the i-th argument had an illegal value
 * 
*/ void pdgstrs(int_t n, LUstruct_t *LUstruct, ScalePermstruct_t *ScalePermstruct, gridinfo_t *grid, double *B, int_t m_loc, int_t fst_row, int_t ldb, int nrhs, SOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, int *info) { Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; double alpha = 1.0; double zero = 0.0; double *lsum; /* Local running sum of the updates to B-components */ double *x; /* X component at step k. */ /* NOTE: x and lsum are of same size. */ double *lusup, *dest; double *recvbuf, *tempv; double *rtemp; /* Result of full matrix-vector multiply. */ int_t **Ufstnz_br_ptr = Llu->Ufstnz_br_ptr; int_t *Urbs, *Urbs1; /* Number of row blocks in each block column of U. */ Ucb_indptr_t **Ucb_indptr;/* Vertical linked list pointing to Uindex[] */ int_t **Ucb_valptr; /* Vertical linked list pointing to Unzval[] */ int_t kcol, krow, mycol, myrow; int_t i, ii, il, j, jj, k, lb, ljb, lk, lptr, luptr; int_t nb, nlb, nub, nsupers; int_t *xsup, *supno, *lsub, *usub; int_t *ilsum; /* Starting position of each supernode in lsum (LOCAL)*/ int Pc, Pr, iam; int knsupc, nsupr; int ldalsum; /* Number of lsum entries locally owned. */ int maxrecvsz, p, pi; int_t **Lrowind_bc_ptr; double **Lnzval_bc_ptr; MPI_Status status; MPI_Request *send_req, recv_req; pxgstrs_comm_t *gstrs_comm = SOLVEstruct->gstrs_comm; /*-- Counts used for L-solve --*/ int_t *fmod; /* Modification count for L-solve -- Count the number of local block products to be summed into lsum[lk]. */ int_t **fsendx_plist = Llu->fsendx_plist; int_t nfrecvx = Llu->nfrecvx; /* Number of X components to be recv'd. */ int_t *frecv; /* Count of lsum[lk] contributions to be received from processes in this row. It is only valid on the diagonal processes. */ int_t nfrecvmod = 0; /* Count of total modifications to be recv'd. */ int_t nleaf = 0, nroot = 0; /*-- Counts used for U-solve --*/ int_t *bmod; /* Modification count for U-solve. */ int_t **bsendx_plist = Llu->bsendx_plist; int_t nbrecvx = Llu->nbrecvx; /* Number of X components to be recv'd. */ int_t *brecv; /* Count of modifications to be recv'd from processes in this row. */ int_t nbrecvmod = 0; /* Count of total modifications to be recv'd. */ double t; #if ( DEBUGlevel>=2 ) int_t Ublocks = 0; #endif int_t *mod_bit = Llu->mod_bit; /* flag contribution from each row block */ t = SuperLU_timer_(); /* Test input parameters. */ *info = 0; if ( n < 0 ) *info = -1; else if ( nrhs < 0 ) *info = -9; if ( *info ) { pxerr_dist("PDGSTRS", grid, -*info); return; } /* * Initialization. */ iam = grid->iam; Pc = grid->npcol; Pr = grid->nprow; myrow = MYROW( iam, grid ); mycol = MYCOL( iam, grid ); xsup = Glu_persist->xsup; supno = Glu_persist->supno; nsupers = supno[n-1] + 1; Lrowind_bc_ptr = Llu->Lrowind_bc_ptr; Lnzval_bc_ptr = Llu->Lnzval_bc_ptr; nlb = CEILING( nsupers, Pr ); /* Number of local block rows. */ #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter pdgstrs()"); #endif stat->ops[SOLVE] = 0.0; Llu->SolveMsgSent = 0; /* Save the count to be altered so it can be used by subsequent call to PDGSTRS. */ if ( !(fmod = intMalloc_dist(nlb)) ) ABORT("Calloc fails for fmod[]."); for (i = 0; i < nlb; ++i) fmod[i] = Llu->fmod[i]; if ( !(frecv = intMalloc_dist(nlb)) ) ABORT("Malloc fails for frecv[]."); Llu->frecv = frecv; k = SUPERLU_MAX( Llu->nfsendx, Llu->nbsendx ) + nlb; if ( !(send_req = (MPI_Request*) SUPERLU_MALLOC(k*sizeof(MPI_Request))) ) ABORT("Malloc fails for send_req[]."); #ifdef _CRAY ftcs1 = _cptofcd("L", strlen("L")); ftcs2 = _cptofcd("N", strlen("N")); ftcs3 = _cptofcd("U", strlen("U")); #endif /* Obtain ilsum[] and ldalsum for process column 0. */ ilsum = Llu->ilsum; ldalsum = Llu->ldalsum; /* Allocate working storage. */ knsupc = sp_ienv_dist(3); maxrecvsz = knsupc * nrhs + SUPERLU_MAX( XK_H, LSUM_H ); if ( !(lsum = doubleCalloc_dist(((size_t)ldalsum)*nrhs + nlb*LSUM_H)) ) ABORT("Calloc fails for lsum[]."); if ( !(x = doubleCalloc_dist(ldalsum * nrhs + nlb * XK_H)) ) ABORT("Calloc fails for x[]."); if ( !(recvbuf = doubleMalloc_dist(maxrecvsz)) ) ABORT("Malloc fails for recvbuf[]."); if ( !(rtemp = doubleCalloc_dist(maxrecvsz)) ) ABORT("Malloc fails for rtemp[]."); /*--------------------------------------------------- * Forward solve Ly = b. *---------------------------------------------------*/ /* Redistribute B into X on the diagonal processes. */ pdReDistribute_B_to_X(B, m_loc, nrhs, ldb, fst_row, ilsum, x, ScalePermstruct, Glu_persist, grid, SOLVEstruct); /* Set up the headers in lsum[]. */ ii = 0; for (k = 0; k < nsupers; ++k) { knsupc = SuperSize( k ); krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ il = LSUM_BLK( lk ); lsum[il - LSUM_H] = k; /* Block number prepended in the header. */ } ii += knsupc; } /* * Compute frecv[] and nfrecvmod counts on the diagonal processes. */ { superlu_scope_t *scp = &grid->rscp; #if 1 for (k = 0; k < nlb; ++k) mod_bit[k] = 0; for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* local block number */ kcol = PCOL( k, grid ); if ( mycol != kcol && fmod[lk] ) mod_bit[lk] = 1; /* contribution from off-diagonal */ } } /*PrintInt10("mod_bit", nlb, mod_bit);*/ #if ( PROFlevel>=2 ) t_reduce_tmp = SuperLU_timer_(); #endif /* Every process receives the count, but it is only useful on the diagonal processes. */ MPI_Allreduce( mod_bit, frecv, nlb, mpi_int_t, MPI_SUM, scp->comm ); #if ( PROFlevel>=2 ) t_reduce += SuperLU_timer_() - t_reduce_tmp; #endif for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* local block number */ kcol = PCOL( k, grid ); if ( mycol == kcol ) { /* diagonal process */ nfrecvmod += frecv[lk]; if ( !frecv[lk] && !fmod[lk] ) ++nleaf; } } } #else /* old */ for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ kcol = PCOL( k, grid ); /* Root process in this row scope. */ if ( mycol != kcol && fmod[lk] ) i = 1; /* Contribution from non-diagonal process. */ else i = 0; MPI_Reduce( &i, &frecv[lk], 1, mpi_int_t, MPI_SUM, kcol, scp->comm ); if ( mycol == kcol ) { /* Diagonal process. */ nfrecvmod += frecv[lk]; if ( !frecv[lk] && !fmod[lk] ) ++nleaf; #if ( DEBUGlevel>=2 ) printf("(%2d) frecv[%4d] %2d\n", iam, k, frecv[lk]); assert( frecv[lk] < Pc ); #endif } } } #endif } /* --------------------------------------------------------- Solve the leaf nodes first by all the diagonal processes. --------------------------------------------------------- */ #if ( DEBUGlevel>=2 ) printf("(%2d) nleaf %4d\n", iam, nleaf); #endif for (k = 0; k < nsupers && nleaf; ++k) { krow = PROW( k, grid ); kcol = PCOL( k, grid ); if ( myrow == krow && mycol == kcol ) { /* Diagonal process */ knsupc = SuperSize( k ); lk = LBi( k, grid ); if ( frecv[lk]==0 && fmod[lk]==0 ) { fmod[lk] = -1; /* Do not solve X[k] in the future. */ ii = X_BLK( lk ); lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY STRSM(ftcs1, ftcs1, ftcs2, ftcs3, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #elif defined (USE_VENDOR_BLAS) dtrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc, 1, 1, 1, 1); #else dtrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif stat->ops[SOLVE] += knsupc * (knsupc - 1) * nrhs; --nleaf; #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ for (p = 0; p < Pr; ++p) { if ( fsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm, &send_req[Llu->SolveMsgSent++]); #if 0 MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm ); #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii-XK_H], pi); #endif } } /* * Perform local block modifications: lsum[i] -= L_i,k * X[k] */ nb = lsub[0] - 1; lptr = BC_HEADER + LB_DESCRIPTOR + knsupc; luptr = knsupc; /* Skip diagonal block L(k,k). */ dlsum_fmod(lsum, x, &x[ii], rtemp, nrhs, knsupc, k, fmod, nb, lptr, luptr, xsup, grid, Llu, send_req, stat); } } /* if diagonal process ... */ } /* for k ... */ /* ----------------------------------------------------------- Compute the internal nodes asynchronously by all processes. ----------------------------------------------------------- */ #if ( DEBUGlevel>=2 ) printf("(%2d) nfrecvx %4d, nfrecvmod %4d, nleaf %4d\n", iam, nfrecvx, nfrecvmod, nleaf); #endif while ( nfrecvx || nfrecvmod ) { /* While not finished. */ /* Receive a message. */ MPI_Recv( recvbuf, maxrecvsz, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, grid->comm, &status ); k = *recvbuf; #if ( DEBUGlevel>=2 ) printf("(%2d) Recv'd block %d, tag %2d\n", iam, k, status.MPI_TAG); #endif switch ( status.MPI_TAG ) { case Xk: --nfrecvx; lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; if ( lsub ) { nb = lsub[0]; lptr = BC_HEADER; luptr = 0; knsupc = SuperSize( k ); /* * Perform local block modifications: lsum[i] -= L_i,k * X[k] */ dlsum_fmod(lsum, x, &recvbuf[XK_H], rtemp, nrhs, knsupc, k, fmod, nb, lptr, luptr, xsup, grid, Llu, send_req, stat); } /* if lsub */ break; case LSUM: /* Receiver must be a diagonal process */ --nfrecvmod; lk = LBi( k, grid ); /* Local block number, row-wise. */ ii = X_BLK( lk ); knsupc = SuperSize( k ); tempv = &recvbuf[LSUM_H]; RHS_ITERATE(j) { for (i = 0; i < knsupc; ++i) x[i + ii + j*knsupc] += tempv[i + j*knsupc]; } if ( (--frecv[lk])==0 && fmod[lk]==0 ) { fmod[lk] = -1; /* Do not solve X[k] in the future. */ lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY STRSM(ftcs1, ftcs1, ftcs2, ftcs3, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #elif defined (USE_VENDOR_BLAS) dtrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc, 1, 1, 1, 1); #else dtrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif stat->ops[SOLVE] += knsupc * (knsupc - 1) * nrhs; #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ kcol = PCOL( k, grid ); for (p = 0; p < Pr; ++p) { if ( fsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); MPI_Isend( &x[ii-XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm, &send_req[Llu->SolveMsgSent++]); #if 0 MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm ); #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii-XK_H], pi); #endif } } /* * Perform local block modifications. */ nb = lsub[0] - 1; lptr = BC_HEADER + LB_DESCRIPTOR + knsupc; luptr = knsupc; /* Skip diagonal block L(k,k). */ dlsum_fmod(lsum, x, &x[ii], rtemp, nrhs, knsupc, k, fmod, nb, lptr, luptr, xsup, grid, Llu, send_req, stat); } /* if */ break; #if ( DEBUGlevel>=2 ) default: printf("(%2d) Recv'd wrong message tag %4d\n", iam, status.MPI_TAG); break; #endif } /* switch */ } /* while not finished ... */ #if ( PRNTlevel>=2 ) t = SuperLU_timer_() - t; if ( !iam ) printf(".. L-solve time\t%8.2f\n", t); t = SuperLU_timer_(); #endif #if ( DEBUGlevel==2 ) { printf("(%d) .. After L-solve: y =\n", iam); for (i = 0, k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); kcol = PCOL( k, grid ); if ( myrow == krow && mycol == kcol ) { /* Diagonal process */ knsupc = SuperSize( k ); lk = LBi( k, grid ); ii = X_BLK( lk ); for (j = 0; j < knsupc; ++j) printf("\t(%d)\t%4d\t%.10f\n", iam, xsup[k]+j, x[ii+j]); fflush(stdout); } MPI_Barrier( grid->comm ); } } #endif SUPERLU_FREE(fmod); SUPERLU_FREE(frecv); SUPERLU_FREE(rtemp); /*for (i = 0; i < Llu->SolveMsgSent; ++i) MPI_Request_free(&send_req[i]);*/ for (i = 0; i < Llu->SolveMsgSent; ++i) MPI_Wait(&send_req[i], &status); Llu->SolveMsgSent = 0; MPI_Barrier( grid->comm ); /*--------------------------------------------------- * Back solve Ux = y. * * The Y components from the forward solve is already * on the diagonal processes. *---------------------------------------------------*/ /* Save the count to be altered so it can be used by subsequent call to PDGSTRS. */ if ( !(bmod = intMalloc_dist(nlb)) ) ABORT("Calloc fails for bmod[]."); for (i = 0; i < nlb; ++i) bmod[i] = Llu->bmod[i]; if ( !(brecv = intMalloc_dist(nlb)) ) ABORT("Malloc fails for brecv[]."); Llu->brecv = brecv; /* * Compute brecv[] and nbrecvmod counts on the diagonal processes. */ { superlu_scope_t *scp = &grid->rscp; #if 1 for (k = 0; k < nlb; ++k) mod_bit[k] = 0; for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* local block number */ kcol = PCOL( k, grid ); /* root process in this row scope */ if ( mycol != kcol && bmod[lk] ) mod_bit[lk] = 1; /* Contribution from off-diagonal */ } } /* Every process receives the count, but it is only useful on the diagonal processes. */ MPI_Allreduce( mod_bit, brecv, nlb, mpi_int_t, MPI_SUM, scp->comm ); for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* local block number */ kcol = PCOL( k, grid ); /* root process in this row scope. */ if ( mycol == kcol ) { /* diagonal process */ nbrecvmod += brecv[lk]; if ( !brecv[lk] && !bmod[lk] ) ++nroot; #if ( DEBUGlevel>=2 ) printf("(%2d) brecv[%4d] %2d\n", iam, k, brecv[lk]); assert( brecv[lk] < Pc ); #endif } } } #else /* old */ for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ kcol = PCOL( k, grid ); /* Root process in this row scope. */ if ( mycol != kcol && bmod[lk] ) i = 1; /* Contribution from non-diagonal process. */ else i = 0; MPI_Reduce( &i, &brecv[lk], 1, mpi_int_t, MPI_SUM, kcol, scp->comm ); if ( mycol == kcol ) { /* Diagonal process. */ nbrecvmod += brecv[lk]; if ( !brecv[lk] && !bmod[lk] ) ++nroot; #if ( DEBUGlevel>=2 ) printf("(%2d) brecv[%4d] %2d\n", iam, k, brecv[lk]); assert( brecv[lk] < Pc ); #endif } } } #endif } /* Re-initialize lsum to zero. Each block header is already in place. */ for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { knsupc = SuperSize( k ); lk = LBi( k, grid ); il = LSUM_BLK( lk ); dest = &lsum[il]; RHS_ITERATE(j) { for (i = 0; i < knsupc; ++i) dest[i + j*knsupc] = zero; } } } /* Set up additional pointers for the index and value arrays of U. nub is the number of local block columns. */ nub = CEILING( nsupers, Pc ); /* Number of local block columns. */ if ( !(Urbs = (int_t *) intCalloc_dist(2*nub)) ) ABORT("Malloc fails for Urbs[]"); /* Record number of nonzero blocks in a block column. */ Urbs1 = Urbs + nub; if ( !(Ucb_indptr = SUPERLU_MALLOC(nub * sizeof(Ucb_indptr_t *))) ) ABORT("Malloc fails for Ucb_indptr[]"); if ( !(Ucb_valptr = SUPERLU_MALLOC(nub * sizeof(int_t *))) ) ABORT("Malloc fails for Ucb_valptr[]"); /* Count number of row blocks in a block column. One pass of the skeleton graph of U. */ for (lk = 0; lk < nlb; ++lk) { usub = Ufstnz_br_ptr[lk]; if ( usub ) { /* Not an empty block row. */ /* usub[0] -- number of column blocks in this block row. */ #if ( DEBUGlevel>=2 ) Ublocks += usub[0]; #endif i = BR_HEADER; /* Pointer in index array. */ for (lb = 0; lb < usub[0]; ++lb) { /* For all column blocks. */ k = usub[i]; /* Global block number */ ++Urbs[LBj(k,grid)]; i += UB_DESCRIPTOR + SuperSize( k ); } } } /* Set up the vertical linked lists for the row blocks. One pass of the skeleton graph of U. */ for (lb = 0; lb < nub; ++lb) { if ( Urbs[lb] ) { /* Not an empty block column. */ if ( !(Ucb_indptr[lb] = SUPERLU_MALLOC(Urbs[lb] * sizeof(Ucb_indptr_t))) ) ABORT("Malloc fails for Ucb_indptr[lb][]"); if ( !(Ucb_valptr[lb] = (int_t *) intMalloc_dist(Urbs[lb])) ) ABORT("Malloc fails for Ucb_valptr[lb][]"); } } for (lk = 0; lk < nlb; ++lk) { /* For each block row. */ usub = Ufstnz_br_ptr[lk]; if ( usub ) { /* Not an empty block row. */ i = BR_HEADER; /* Pointer in index array. */ j = 0; /* Pointer in nzval array. */ for (lb = 0; lb < usub[0]; ++lb) { /* For all column blocks. */ k = usub[i]; /* Global block number, column-wise. */ ljb = LBj( k, grid ); /* Local block number, column-wise. */ Ucb_indptr[ljb][Urbs1[ljb]].lbnum = lk; Ucb_indptr[ljb][Urbs1[ljb]].indpos = i; Ucb_valptr[ljb][Urbs1[ljb]] = j; ++Urbs1[ljb]; j += usub[i+1]; i += UB_DESCRIPTOR + SuperSize( k ); } } } #if ( DEBUGlevel>=2 ) for (p = 0; p < Pr*Pc; ++p) { if (iam == p) { printf("(%2d) .. Ublocks %d\n", iam, Ublocks); for (lb = 0; lb < nub; ++lb) { printf("(%2d) Local col %2d: # row blocks %2d\n", iam, lb, Urbs[lb]); if ( Urbs[lb] ) { for (i = 0; i < Urbs[lb]; ++i) printf("(%2d) .. row blk %2d:\ lbnum %d, indpos %d, valpos %d\n", iam, i, Ucb_indptr[lb][i].lbnum, Ucb_indptr[lb][i].indpos, Ucb_valptr[lb][i]); } } } MPI_Barrier( grid->comm ); } for (p = 0; p < Pr*Pc; ++p) { if ( iam == p ) { printf("\n(%d) bsendx_plist[][]", iam); for (lb = 0; lb < nub; ++lb) { printf("\n(%d) .. local col %2d: ", iam, lb); for (i = 0; i < Pr; ++i) printf("%4d", bsendx_plist[lb][i]); } printf("\n"); } MPI_Barrier( grid->comm ); } #endif /* DEBUGlevel */ #if ( PRNTlevel>=3 ) t = SuperLU_timer_() - t; if ( !iam) printf(".. Setup U-solve time\t%8.2f\n", t); t = SuperLU_timer_(); #endif /* * Solve the roots first by all the diagonal processes. */ #if ( DEBUGlevel>=2 ) printf("(%2d) nroot %4d\n", iam, nroot); #endif for (k = nsupers-1; k >= 0 && nroot; --k) { krow = PROW( k, grid ); kcol = PCOL( k, grid ); if ( myrow == krow && mycol == kcol ) { /* Diagonal process. */ knsupc = SuperSize( k ); lk = LBi( k, grid ); /* Local block number, row-wise. */ if ( brecv[lk]==0 && bmod[lk]==0 ) { bmod[lk] = -1; /* Do not solve X[k] in the future. */ ii = X_BLK( lk ); lk = LBj( k, grid ); /* Local block number, column-wise */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY STRSM(ftcs1, ftcs3, ftcs2, ftcs2, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #elif defined (USE_VENDOR_BLAS) dtrsm_("L", "U", "N", "N", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc, 1, 1, 1, 1); #else dtrsm_("L", "U", "N", "N", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif stat->ops[SOLVE] += knsupc * (knsupc + 1) * nrhs; --nroot; #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ for (p = 0; p < Pr; ++p) { if ( bsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm, &send_req[Llu->SolveMsgSent++]); #if 0 MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm ); #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii-XK_H], pi); #endif } } /* * Perform local block modifications: lsum[i] -= U_i,k * X[k] */ if ( Urbs[lk] ) dlsum_bmod(lsum, x, &x[ii], nrhs, k, bmod, Urbs, Ucb_indptr, Ucb_valptr, xsup, grid, Llu, send_req, stat); } /* if root ... */ } /* if diagonal process ... */ } /* for k ... */ /* * Compute the internal nodes asynchronously by all processes. */ while ( nbrecvx || nbrecvmod ) { /* While not finished. */ /* Receive a message. */ MPI_Recv( recvbuf, maxrecvsz, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, grid->comm, &status ); k = *recvbuf; #if ( DEBUGlevel>=2 ) printf("(%2d) Recv'd block %d, tag %2d\n", iam, k, status.MPI_TAG); #endif switch ( status.MPI_TAG ) { case Xk: --nbrecvx; lk = LBj( k, grid ); /* Local block number, column-wise. */ /* * Perform local block modifications: * lsum[i] -= U_i,k * X[k] */ dlsum_bmod(lsum, x, &recvbuf[XK_H], nrhs, k, bmod, Urbs, Ucb_indptr, Ucb_valptr, xsup, grid, Llu, send_req, stat); break; case LSUM: /* Receiver must be a diagonal process */ --nbrecvmod; lk = LBi( k, grid ); /* Local block number, row-wise. */ ii = X_BLK( lk ); knsupc = SuperSize( k ); tempv = &recvbuf[LSUM_H]; RHS_ITERATE(j) { for (i = 0; i < knsupc; ++i) x[i + ii + j*knsupc] += tempv[i + j*knsupc]; } if ( (--brecv[lk])==0 && bmod[lk]==0 ) { bmod[lk] = -1; /* Do not solve X[k] in the future. */ lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY STRSM(ftcs1, ftcs3, ftcs2, ftcs2, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #elif defined (USE_VENDOR_BLAS) dtrsm_("L", "U", "N", "N", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc, 1, 1, 1, 1); #else dtrsm_("L", "U", "N", "N", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif stat->ops[SOLVE] += knsupc * (knsupc + 1) * nrhs; #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ kcol = PCOL( k, grid ); for (p = 0; p < Pr; ++p) { if ( bsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm, &send_req[Llu->SolveMsgSent++] ); #if 0 MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm ); #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii - XK_H], pi); #endif } } /* * Perform local block modifications: * lsum[i] -= U_i,k * X[k] */ if ( Urbs[lk] ) dlsum_bmod(lsum, x, &x[ii], nrhs, k, bmod, Urbs, Ucb_indptr, Ucb_valptr, xsup, grid, Llu, send_req, stat); } /* if becomes solvable */ break; #if ( DEBUGlevel>=2 ) default: printf("(%2d) Recv'd wrong message tag %4d\n", iam, status.MPI_TAG); break; #endif } /* switch */ } /* while not finished ... */ #if ( PRNTlevel>=3 ) t = SuperLU_timer_() - t; if ( !iam ) printf(".. U-solve time\t%8.2f\n", t); #endif #if ( DEBUGlevel>=2 ) { double *x_col; int diag; printf("\n(%d) .. After U-solve: x (ON DIAG PROCS) = \n", iam); ii = 0; for (k = 0; k < nsupers; ++k) { knsupc = SuperSize( k ); krow = PROW( k, grid ); kcol = PCOL( k, grid ); diag = PNUM( krow, kcol, grid); if ( iam == diag ) { /* Diagonal process. */ lk = LBi( k, grid ); jj = X_BLK( lk ); x_col = &x[jj]; RHS_ITERATE(j) { for (i = 0; i < knsupc; ++i) { /* X stored in blocks */ printf("\t(%d)\t%4d\t%.10f\n", iam, xsup[k]+i, x_col[i]); } x_col += knsupc; } } ii += knsupc; } /* for k ... */ } #endif pdReDistribute_X_to_B(n, B, m_loc, ldb, fst_row, nrhs, x, ilsum, ScalePermstruct, Glu_persist, grid, SOLVEstruct); /* Deallocate storage. */ SUPERLU_FREE(lsum); SUPERLU_FREE(x); SUPERLU_FREE(recvbuf); for (i = 0; i < nub; ++i) { if ( Urbs[i] ) { SUPERLU_FREE(Ucb_indptr[i]); SUPERLU_FREE(Ucb_valptr[i]); } } SUPERLU_FREE(Ucb_indptr); SUPERLU_FREE(Ucb_valptr); SUPERLU_FREE(Urbs); SUPERLU_FREE(bmod); SUPERLU_FREE(brecv); /*for (i = 0; i < Llu->SolveMsgSent; ++i) MPI_Request_free(&send_req[i]);*/ for (i = 0; i < Llu->SolveMsgSent; ++i) MPI_Wait(&send_req[i], &status); SUPERLU_FREE(send_req); MPI_Barrier( grid->comm ); stat->utime[SOLVE] = SuperLU_timer_() - t; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pdgstrs()"); #endif return; } /* PDGSTRS */ SuperLU_DIST_5.3.0/SRC/zutil_dist.c0000644013363400111340000003621613233431301015631 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Several matrix utilities * *
 * -- Distributed SuperLU routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * March 15, 2003
 *
 */

#include 
#include "superlu_zdefs.h"

void
zCreate_CompCol_Matrix_dist(SuperMatrix *A, int_t m, int_t n, int_t nnz, 
			    doublecomplex *nzval, int_t *rowind, int_t *colptr,
			    Stype_t stype, Dtype_t dtype, Mtype_t mtype)
{
    NCformat *Astore;

    A->Stype = stype;
    A->Dtype = dtype;
    A->Mtype = mtype;
    A->nrow = m;
    A->ncol = n;
    A->Store = (void *) SUPERLU_MALLOC( sizeof(NCformat) );
    if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
    Astore = (NCformat *) A->Store;
    Astore->nnz = nnz;
    Astore->nzval = nzval;
    Astore->rowind = rowind;
    Astore->colptr = colptr;
}

void
zCreate_CompRowLoc_Matrix_dist(SuperMatrix *A, int_t m, int_t n,
			       int_t nnz_loc, int_t m_loc, int_t fst_row,
			       doublecomplex *nzval, int_t *colind, int_t *rowptr,
			       Stype_t stype, Dtype_t dtype, Mtype_t mtype)
{
    NRformat_loc *Astore;

    A->Stype = stype;
    A->Dtype = dtype;
    A->Mtype = mtype;
    A->nrow = m;
    A->ncol = n;
    A->Store = (void *) SUPERLU_MALLOC( sizeof(NRformat_loc) );
    if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
    Astore = (NRformat_loc *) A->Store;
    Astore->nnz_loc = nnz_loc;
    Astore->fst_row = fst_row;
    Astore->m_loc = m_loc;
    Astore->nzval = nzval;
    Astore->colind = colind;
    Astore->rowptr = rowptr;
}

/*! \brief Convert a row compressed storage into a column compressed storage.
 */
void
zCompRow_to_CompCol_dist(int_t m, int_t n, int_t nnz, 
                         doublecomplex *a, int_t *colind, int_t *rowptr,
                         doublecomplex **at, int_t **rowind, int_t **colptr)
{
    register int i, j, col, relpos;
    int_t *marker;

    /* Allocate storage for another copy of the matrix. */
    *at = (doublecomplex *) doublecomplexMalloc_dist(nnz);
    *rowind = intMalloc_dist(nnz);
    *colptr = intMalloc_dist(n+1);
    marker = intCalloc_dist(n);
    
    /* Get counts of each column of A, and set up column pointers */
    for (i = 0; i < m; ++i)
	for (j = rowptr[i]; j < rowptr[i+1]; ++j) ++marker[colind[j]];
    (*colptr)[0] = 0;
    for (j = 0; j < n; ++j) {
	(*colptr)[j+1] = (*colptr)[j] + marker[j];
	marker[j] = (*colptr)[j];
    }

    /* Transfer the matrix into the compressed column storage. */
    for (i = 0; i < m; ++i) {
	for (j = rowptr[i]; j < rowptr[i+1]; ++j) {
	    col = colind[j];
	    relpos = marker[col];
	    (*rowind)[relpos] = i;
	    (*at)[relpos] = a[j];
	    ++marker[col];
	}
    }

    SUPERLU_FREE(marker);
}

/*! \brief Copy matrix A into matrix B. */
void
zCopy_CompCol_Matrix_dist(SuperMatrix *A, SuperMatrix *B)
{
    NCformat *Astore, *Bstore;
    int      ncol, nnz, i;

    B->Stype = A->Stype;
    B->Dtype = A->Dtype;
    B->Mtype = A->Mtype;
    B->nrow  = A->nrow;;
    B->ncol  = ncol = A->ncol;
    Astore   = (NCformat *) A->Store;
    Bstore   = (NCformat *) B->Store;
    Bstore->nnz = nnz = Astore->nnz;
    for (i = 0; i < nnz; ++i)
	((doublecomplex *)Bstore->nzval)[i] = ((doublecomplex *)Astore->nzval)[i];
    for (i = 0; i < nnz; ++i) Bstore->rowind[i] = Astore->rowind[i];
    for (i = 0; i <= ncol; ++i) Bstore->colptr[i] = Astore->colptr[i];
}


void zPrint_CompCol_Matrix_dist(SuperMatrix *A)
{
    NCformat     *Astore;
    register int i;
    doublecomplex       *dp;
    
    printf("\nCompCol matrix: ");
    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
    Astore = (NCformat *) A->Store;
    printf("nrow %lld, ncol %lld, nnz %lld\n", (long long) A->nrow,
	    (long long) A->ncol, (long long) Astore->nnz);
    if ( (dp = (doublecomplex *) Astore->nzval) != NULL ) {
        printf("nzval:\n");
        for (i = 0; i < Astore->nnz; ++i) printf("%f\t%f\n", dp[i].r, dp[i].i);
    }
    printf("\nrowind:\n");
    for (i = 0; i < Astore->nnz; ++i) 
        printf("%lld  ", (long long) Astore->rowind[i]);
    printf("\ncolptr:\n");
    for (i = 0; i <= A->ncol; ++i) 
        printf("%lld  ", (long long) Astore->colptr[i]);
    printf("\nend CompCol matrix.\n");
}

void zPrint_Dense_Matrix_dist(SuperMatrix *A)
{
    DNformat     *Astore;
    register int i;
    doublecomplex       *dp;
    
    printf("\nDense matrix: ");
    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
    Astore = (DNformat *) A->Store;
    dp = (doublecomplex *) Astore->nzval;
    printf("nrow %lld, ncol %lld, lda %lld\n", 
        (long long) A->nrow, (long long) A->ncol, (long long) Astore->lda);
    printf("\nnzval: ");
    for (i = 0; i < A->nrow; ++i) printf("%f\t%f\n", dp[i].r, dp[i].i);
    printf("\nend Dense matrix.\n");
}

int zPrint_CompRowLoc_Matrix_dist(SuperMatrix *A)
{
    NRformat_loc  *Astore;
    int_t  nnz_loc, m_loc;
    doublecomplex  *dp;
    
    printf("\n==== CompRowLoc matrix: ");
    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
    Astore = (NRformat_loc *) A->Store;
    printf("nrow %ld, ncol %ld\n", 
            (long int) A->nrow, (long int) A->ncol);
    nnz_loc = Astore->nnz_loc; m_loc = Astore->m_loc;
    printf("nnz_loc %ld, m_loc %ld, fst_row %ld\n", (long int) nnz_loc, 
            (long int) m_loc, (long int) Astore->fst_row);
    PrintInt10("rowptr", m_loc+1, Astore->rowptr);
    PrintInt10("colind", nnz_loc, Astore->colind);
    if ( (dp = (doublecomplex *) Astore->nzval) != NULL )
        PrintDoublecomplex("nzval", nnz_loc, dp);
    printf("==== end CompRowLoc matrix\n");
    return 0;
}

int file_zPrint_CompRowLoc_Matrix_dist(FILE *fp, SuperMatrix *A)
{
    NRformat_loc     *Astore;
    int_t  nnz_loc, m_loc;
    doublecomplex       *dp;
    
    fprintf(fp, "\n==== CompRowLoc matrix: ");
    fprintf(fp, "Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
    Astore = (NRformat_loc *) A->Store;
    fprintf(fp, "nrow %ld, ncol %ld\n", (long int) A->nrow, (long int) A->ncol);
    nnz_loc = Astore->nnz_loc; m_loc = Astore->m_loc;
    fprintf(fp, "nnz_loc %ld, m_loc %ld, fst_row %ld\n", (long int) nnz_loc,
            (long int) m_loc, (long int) Astore->fst_row);
    file_PrintInt10(fp, "rowptr", m_loc+1, Astore->rowptr);
    file_PrintInt10(fp, "colind", nnz_loc, Astore->colind);
    if ( (dp = (doublecomplex *) Astore->nzval) != NULL )
        file_PrintDoublecomplex(fp, "nzval", nnz_loc, dp);
    fprintf(fp, "==== end CompRowLoc matrix\n");
    return 0;
}

void
zCreate_Dense_Matrix_dist(SuperMatrix *X, int_t m, int_t n, doublecomplex *x,
			  int_t ldx, Stype_t stype, Dtype_t dtype,
			  Mtype_t mtype)
{
    DNformat    *Xstore;
    
    X->Stype = stype;
    X->Dtype = dtype;
    X->Mtype = mtype;
    X->nrow = m;
    X->ncol = n;
    X->Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
    if ( !(X->Store) ) ABORT("SUPERLU_MALLOC fails for X->Store");
    Xstore = (DNformat *) X->Store;
    Xstore->lda = ldx;
    Xstore->nzval = (doublecomplex *) x;
}

void
zCopy_Dense_Matrix_dist(int_t M, int_t N, doublecomplex *X, int_t ldx,
			doublecomplex *Y, int_t ldy)
{
/*! \brief
 *
 * 
 *  Purpose
 *  =======
 *
 *  Copies a two-dimensional matrix X to another matrix Y.
 * 
*/ int i, j; for (j = 0; j < N; ++j) for (i = 0; i < M; ++i) Y[i + j*ldy] = X[i + j*ldx]; } void zCreate_SuperNode_Matrix_dist(SuperMatrix *L, int_t m, int_t n, int_t nnz, doublecomplex *nzval, int_t *nzval_colptr, int_t *rowind, int_t *rowind_colptr, int_t *col_to_sup, int_t *sup_to_col, Stype_t stype, Dtype_t dtype, Mtype_t mtype) { SCformat *Lstore; L->Stype = stype; L->Dtype = dtype; L->Mtype = mtype; L->nrow = m; L->ncol = n; L->Store = (void *) SUPERLU_MALLOC( sizeof(SCformat) ); if ( !(L->Store) ) ABORT("SUPERLU_MALLOC fails for L->Store"); Lstore = L->Store; Lstore->nnz = nnz; Lstore->nsuper = col_to_sup[n]; Lstore->nzval = nzval; Lstore->nzval_colptr = nzval_colptr; Lstore->rowind = rowind; Lstore->rowind_colptr = rowind_colptr; Lstore->col_to_sup = col_to_sup; Lstore->sup_to_col = sup_to_col; } void zGenXtrue_dist(int_t n, int_t nrhs, doublecomplex *x, int_t ldx) { int i, j; for (j = 0; j < nrhs; ++j) for (i = 0; i < n; ++i) { if ( i % 2 ) x[i + j*ldx].r = 1.0; else x[i + j*ldx].r = 2.0; x[i + j*ldx].i = 0.0; } } /*! \brief Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's */ void zFillRHS_dist(char *trans, int_t nrhs, doublecomplex *x, int_t ldx, SuperMatrix *A, doublecomplex *rhs, int_t ldb) { doublecomplex one = {1.0, 0.0}; doublecomplex zero = {0.0, 0.0}; sp_zgemm_dist(trans, nrhs, one, A, x, ldx, zero, rhs, ldb); } /*! \brief Fills a doublecomplex precision array with a given value. */ void zfill_dist(doublecomplex *a, int_t alen, doublecomplex dval) { register int_t i; for (i = 0; i < alen; i++) a[i] = dval; } /*! \brief Check the inf-norm of the error vector */ void zinf_norm_error_dist(int_t n, int_t nrhs, doublecomplex *x, int_t ldx, doublecomplex *xtrue, int_t ldxtrue, gridinfo_t *grid) { double err, xnorm; doublecomplex *x_work, *xtrue_work; doublecomplex temp; int i, j; for (j = 0; j < nrhs; j++) { x_work = &x[j*ldx]; xtrue_work = &xtrue[j*ldxtrue]; err = xnorm = 0.0; for (i = 0; i < n; i++) { z_sub(&temp, &x_work[i], &xtrue_work[i]); err = SUPERLU_MAX(err, slud_z_abs(&temp)); xnorm = SUPERLU_MAX(xnorm, slud_z_abs(&x_work[i])); } err = err / xnorm; printf("\tRHS %2d: ||X-Xtrue||/||X|| = %e\n", j, err); } } void PrintDoublecomplex(char *name, int_t len, doublecomplex *x) { register int_t i; printf("%10s:\tReal\tImag\n", name); for (i = 0; i < len; ++i) printf("\t" IFMT "\t%.4f\t%.4f\n", i, x[i].r, x[i].i); } int file_PrintDoublecomplex(FILE *fp, char *name, int_t len, doublecomplex *x) { register int_t i; fprintf(fp, "%10s:\tReal\tImag\n", name); for (i = 0; i < len; ++i) fprintf(fp, "\t" IFMT "\t%.4f\t%.4f\n", i, x[i].r, x[i].i); return 0; } /*! \brief Print the blocks in the factored matrix L. */ void zPrintLblocks(int iam, int_t nsupers, gridinfo_t *grid, Glu_persist_t *Glu_persist, LocalLU_t *Llu) { register int c, extra, gb, j, lb, nsupc, nsupr, len, nb, ncb; register int_t k, mycol, r; int_t *xsup = Glu_persist->xsup; int_t *index; doublecomplex *nzval; printf("\n[%d] L BLOCKS IN COLUMN-MAJOR ORDER -->\n", iam); ncb = nsupers / grid->npcol; extra = nsupers % grid->npcol; mycol = MYCOL( iam, grid ); if ( mycol < extra ) ++ncb; for (lb = 0; lb < ncb; ++lb) { index = Llu->Lrowind_bc_ptr[lb]; if ( index ) { /* Not an empty column */ nzval = Llu->Lnzval_bc_ptr[lb]; nb = index[0]; nsupr = index[1]; gb = lb * grid->npcol + mycol; nsupc = SuperSize( gb ); printf("[%d] block column %d (local # %d), nsupc %d, # row blocks %d\n", iam, gb, lb, nsupc, nb); for (c = 0, k = BC_HEADER, r = 0; c < nb; ++c) { len = index[k+1]; printf("[%d] row-block %d: block # " IFMT "\tlength %d\n", iam, c, index[k], len); PrintInt10("lsub", len, &index[k+LB_DESCRIPTOR]); for (j = 0; j < nsupc; ++j) { PrintDoublecomplex("nzval", len, &nzval[r + j*nsupr]); } k += LB_DESCRIPTOR + len; r += len; } } printf("(%d)", iam); PrintInt32("ToSendR[]", grid->npcol, Llu->ToSendR[lb]); PrintInt10("fsendx_plist[]", grid->nprow, Llu->fsendx_plist[lb]); } printf("nfrecvx " IFMT "\n", Llu->nfrecvx); k = CEILING( nsupers, grid->nprow ); PrintInt10("fmod", k, Llu->fmod); } /* ZPRINTLBLOCKS */ /*! \brief Print the blocks in the factored matrix U. */ void zPrintUblocks(int iam, int_t nsupers, gridinfo_t *grid, Glu_persist_t *Glu_persist, LocalLU_t *Llu) { register int c, extra, jb, k, lb, len, nb, nrb, nsupc; register int_t myrow, r; int_t *xsup = Glu_persist->xsup; int_t *index; doublecomplex *nzval; printf("\n[%d] U BLOCKS IN ROW-MAJOR ORDER -->\n", iam); nrb = nsupers / grid->nprow; extra = nsupers % grid->nprow; myrow = MYROW( iam, grid ); if ( myrow < extra ) ++nrb; for (lb = 0; lb < nrb; ++lb) { index = Llu->Ufstnz_br_ptr[lb]; if ( index ) { /* Not an empty row */ nzval = Llu->Unzval_br_ptr[lb]; nb = index[0]; printf("[%d] block row " IFMT " (local # %d), # column blocks %d\n", iam, lb*grid->nprow+myrow, lb, nb); r = 0; for (c = 0, k = BR_HEADER; c < nb; ++c) { jb = index[k]; len = index[k+1]; printf("[%d] col-block %d: block # %d\tlength " IFMT "\n", iam, c, jb, index[k+1]); nsupc = SuperSize( jb ); PrintInt10("fstnz", nsupc, &index[k+UB_DESCRIPTOR]); PrintDoublecomplex("nzval", len, &nzval[r]); k += UB_DESCRIPTOR + nsupc; r += len; } printf("[%d] ToSendD[] %d\n", iam, Llu->ToSendD[lb]); } } } /* ZPRINTUBLOCKS */ int zprint_gsmv_comm(FILE *fp, int_t m_loc, pzgsmv_comm_t *gsmv_comm, gridinfo_t *grid) { int_t procs = grid->nprow*grid->npcol; fprintf(fp, "TotalIndSend " IFMT "\tTotalValSend " IFMT "\n", gsmv_comm->TotalIndSend, gsmv_comm->TotalValSend); file_PrintInt10(fp, "extern_start", m_loc, gsmv_comm->extern_start); file_PrintInt10(fp, "ind_tosend", gsmv_comm->TotalIndSend, gsmv_comm->ind_tosend); file_PrintInt10(fp, "ind_torecv", gsmv_comm->TotalValSend, gsmv_comm->ind_torecv); file_PrintInt10(fp, "ptr_ind_tosend", procs+1, gsmv_comm->ptr_ind_tosend); file_PrintInt10(fp, "ptr_ind_torecv", procs+1, gsmv_comm->ptr_ind_torecv); file_PrintInt32(fp, "SendCounts", procs, gsmv_comm->SendCounts); file_PrintInt32(fp, "RecvCounts", procs, gsmv_comm->RecvCounts); return 0; } /* cg5.cua b = A*x y = L\b 0 1 + 4.0000i 1.0000 + 4.0000i 1 0 + 5.0000i 1.3529 + 5.4118i 2 1 + 4.0000i 1.0000 + 4.0000i 3 2 + 3.0000i 2.0000 + 3.0000i 4 1 + 4.0000i 3.5882 + 4.3529i 5 1 + 4.0000i 4.1250 + 3.3202i 6 + 5.0000i 4.4640 + 3.8632i 7 2 + 3.0000i 2.0000 + 3.0000i 8 2 + 3.0000i 2.0000 + 3.0000i 9 1 + 4.0000i 1.0000 + 4.0000i 10 1 + 4.0000i 3.5882 + 4.3529i 11 + 5.0000i 0 + 5.0000i 12 1 + 4.0000i 5.1793 + 4.6604i 13 2 + 3.0000i 2.0000 + 3.0000i 14 1 + 4.0000i 1.0000 + 4.0000i 15 + 5.0000i 1.3529 + 5.4118i 16 1 + 4.0000i 4.0045 + 3.8950i 17 + 5.0000i 3.0338 + 4.6248i 18 1 + 4.0000i 5.4495 + 2.2703i 19 + 5.0000i 4.0980 + 3.7290i 20 + 5.0000i 4.2680 + 3.7739i 21 + 5.0000i 5.3514 + 2.9480i 22 1 + 4.0000i 4.4178 + 2.0476i 23 1 + 4.0000i 3.5615 + 2.8322i 24 + 5.0000i 4.7526 + 2.2605i */ SuperLU_DIST_5.3.0/SRC/dlook_ahead_update.c0000644013363400111340000002222213233431301017223 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /************************************************************************/ /*! @file * \brief Look-ahead update of the Schur complement. * *
 * -- Distributed SuperLU routine (version 5.2) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * October 1, 2014
 *
 * Modified: September 18, 2017
 *   
 */

iukp = iukp0; /* point to the first block in index[] */
rukp = rukp0; /* point to the start of nzval[] */

#ifdef ISORT
while (j < nub && iperm_u[j] <= k0 + num_look_aheads)
#else
while (j < nub && perm_u[2 * j] <= k0 + num_look_aheads)
#endif
{
    double zero = 0.0;

#if 0 // Sherry: no need to search
    /* Caveat: There is a permutation perm_u involved for j  */
    /* Search along the row for the pointers {iukp, rukp} pointing to
     * block U(k,j).
     * j    -- current block in look-ahead window, initialized to 0 on entry
     * iukp -- point to the start of index[] medadata
     * rukp -- point to the start of nzval[] array
     * jb   -- block number of block U(k,j), update destination column
     */
    arrive_at_ublock(
		     j, &iukp, &rukp, &jb, &ljb, &nsupc,
         	     iukp0, rukp0, usub, perm_u, xsup, grid
		    );
#else
    jb = usub[iukp];
    ljb = LBj (jb, grid);     /* Local block number of U(k,j). */
    nsupc = SuperSize(jb);
    iukp += UB_DESCRIPTOR; /* Start fstnz of block U(k,j). */
#endif

    j++;
    jj0++;
    jj = iukp;

    while (usub[jj] == klst) ++jj; /* Skip zero segments */

    ldu = klst - usub[jj++];
    ncols = 1;

    /* This loop computes ldu. */
    for (; jj < iukp + nsupc; ++jj) { /* for each column jj in block U(k,j) */
        segsize = klst - usub[jj];
        if (segsize) {
            ++ncols;
            if (segsize > ldu)  ldu = segsize;
        }
    }
#if ( DEBUGlevel>=3 )
    ++num_update;
#endif

#if ( DEBUGlevel>=3 )
    printf ("(%d) k=%d,jb=%d,ldu=%d,ncols=%d,nsupc=%d\n",
	    iam, k, jb, ldu, ncols, nsupc);
    ++num_copy;
#endif

    /* Now copy one block U(k,j) to bigU for GEMM, padding zeros up to ldu. */
    tempu = bigU; /* Copy one block U(k,j) to bigU for GEMM */
    for (jj = iukp; jj < iukp + nsupc; ++jj) {
        segsize = klst - usub[jj];
        if (segsize) {
            lead_zero = ldu - segsize;
            for (i = 0; i < lead_zero; ++i) tempu[i] = zero;
            tempu += lead_zero;
            for (i = 0; i < segsize; ++i) {
                tempu[i] = uval[rukp + i];
            }
            rukp += segsize;
            tempu += segsize;
        }
    }
    tempu = bigU; /* set back to the beginning of the buffer */
#if 0
    rukp -= usub[iukp - 1]; /* Return to start of U(k,j). */
#endif

    nbrow = lsub[1]; /* number of row subscripts in L(:,k) */
    if (myrow == krow) nbrow = lsub[1] - lsub[3]; /* skip diagonal block for those rows. */
    // double ttx =SuperLU_timer_();

    int current_b = 0; /* Each thread starts searching from first block.
                          This records the moving search target.           */
    lptr = lptr0; /* point to the start of index[] in supernode L(:,k) */
    luptr = luptr0;

#ifdef _OPENMP
    /* Sherry -- examine all the shared variables ??
       'firstprivate' ensures that the private variables are initialized
       to the values before entering the loop.  */
#pragma omp parallel for \
    firstprivate(lptr,luptr,ib,current_b) private(lb) \
    default(shared) schedule(dynamic)
#endif
    for (lb = 0; lb < nlb; lb++) { /* Loop through each block in L(:,k) */
        int temp_nbrow; /* automatic variable is private */

        /* Search for the L block that my thread will work on.
           No need to search from 0, can continue at the point where
           it is left from last iteration.
           Note: Blocks may not be sorted in L. Different thread picks up
	   different lb.   */
        for (; current_b < lb; ++current_b) {
            temp_nbrow = lsub[lptr + 1];    /* Number of full rows. */
            lptr += LB_DESCRIPTOR;  /* Skip descriptor. */
            lptr += temp_nbrow;   /* move to next block */
            luptr += temp_nbrow;  /* move to next block */
        }

#ifdef _OPENMP        
        int_t thread_id = omp_get_thread_num ();
#else
        int_t thread_id = 0;
#endif
        double * tempv = bigV + ldt*ldt*thread_id;

        int *indirect_thread  = indirect + ldt * thread_id;
        int *indirect2_thread = indirect2 + ldt * thread_id;        
        ib = lsub[lptr];        /* block number of L(i,k) */
        temp_nbrow = lsub[lptr + 1];    /* Number of full rows. */
	/* assert (temp_nbrow <= nbrow); */

        lptr += LB_DESCRIPTOR;  /* Skip descriptor. */

	/*if (thread_id == 0) tt_start = SuperLU_timer_();*/

        /* calling gemm */
	stat->ops[FACT] += 2.0 * (flops_t)temp_nbrow * ldu * ncols;
#if defined (USE_VENDOR_BLAS)
        dgemm_("N", "N", &temp_nbrow, &ncols, &ldu, &alpha,
                   &lusup[luptr + (knsupc - ldu) * nsupr], &nsupr,
                   tempu, &ldu, &beta, tempv, &temp_nbrow, 1, 1);
#else
        dgemm_("N", "N", &temp_nbrow, &ncols, &ldu, &alpha,
                   &lusup[luptr + (knsupc - ldu) * nsupr], &nsupr,
                   tempu, &ldu, &beta, tempv, &temp_nbrow );
#endif

#if 0
	if (thread_id == 0) {
	    tt_end = SuperLU_timer_();
	    LookAheadGEMMTimer += tt_end - tt_start;
	    tt_start = tt_end;
	} 
#endif
        /* Now scattering the output. */
        if (ib < jb) {    /* A(i,j) is in U. */
            dscatter_u (ib, jb,
                       nsupc, iukp, xsup,
                       klst, temp_nbrow,
                       lptr, temp_nbrow, lsub,
                       usub, tempv, Ufstnz_br_ptr, Unzval_br_ptr, grid);
        } else {          /* A(i,j) is in L. */
            dscatter_l (ib, ljb, nsupc, iukp, xsup, klst, temp_nbrow, lptr,
                       temp_nbrow, usub, lsub, tempv,
                       indirect_thread, indirect2_thread, 
                       Lrowind_bc_ptr, Lnzval_bc_ptr, grid);
        }

        ++current_b;         /* Move to next block. */
        lptr += temp_nbrow;
        luptr += temp_nbrow;

#if 0
	if (thread_id == 0) {
	    tt_end = SuperLU_timer_();
	    LookAheadScatterTimer += tt_end - tt_start;
	}
#endif
    } /* end parallel for lb = 0, nlb ... all blocks in L(:,k) */

#if 0
    rukp += usub[iukp - 1]; /* Move to block U(k,j+1) */
#endif
    iukp += nsupc; /* Mov to block U(k,j+1) */

    /* =========================================== *
     * == factorize L(:,j) and send if possible == *
     * =========================================== */
    kk = jb; /* destination column that is just updated */
    kcol = PCOL (kk, grid);
#ifdef ISORT
    kk0 = iperm_u[j - 1];
#else
    kk0 = perm_u[2 * (j - 1)];
#endif
    look_id = kk0 % (1 + num_look_aheads);

    if (look_ahead[kk] == k0 && kcol == mycol) {
        /* current column is the last dependency */
        look_id = kk0 % (1 + num_look_aheads);

        /* Factor diagonal and subdiagonal blocks and test for exact
           singularity.  */
        factored[kk] = 0;

        double tt1 = SuperLU_timer_();

        PDGSTRF2(options, kk0, kk, thresh, Glu_persist, grid, Llu,
                  U_diag_blk_send_req, tag_ub, stat, info);

        pdgstrf2_timer += SuperLU_timer_() - tt1; 

        /* stat->time7 += SuperLU_timer_() - ttt1; */

        /* Multicasts numeric values of L(:,kk) to process rows. */
        send_req = send_reqs[look_id];
        msgcnt = msgcnts[look_id];

        lk = LBj (kk, grid);    /* Local block number. */
        lsub1 = Lrowind_bc_ptr[lk];
        lusup1 = Lnzval_bc_ptr[lk];
        if (lsub1) {
            msgcnt[0] = lsub1[1] + BC_HEADER + lsub1[0] * LB_DESCRIPTOR;
            msgcnt[1] = lsub1[1] * SuperSize (kk);
        } else {
            msgcnt[0] = 0;
            msgcnt[1] = 0;
        }

        scp = &grid->rscp;      /* The scope of process row. */
        for (pj = 0; pj < Pc; ++pj) {
            if (ToSendR[lk][pj] != EMPTY) {
#if ( PROFlevel>=1 )
                TIC (t1);
#endif
                MPI_Isend (lsub1, msgcnt[0], mpi_int_t, pj,
                           SLU_MPI_TAG (0, kk0) /* (4*kk0)%tag_ub */ ,
                           scp->comm, &send_req[pj]);
                MPI_Isend (lusup1, msgcnt[1], MPI_DOUBLE, pj,
                           SLU_MPI_TAG (1, kk0) /* (4*kk0+1)%tag_ub */ ,
                           scp->comm, &send_req[pj + Pc]);
#if ( PROFlevel>=1 )
                TOC (t2, t1);
                stat->utime[COMM] += t2;
                msg_cnt += 2;
                msg_vol += msgcnt[0] * iword + msgcnt[1] * dword;
#endif
#if ( DEBUGlevel>=2 )
                printf ("[%d] -2- Send L(:,%4d): #lsub %4d, #lusup %4d to Pj %2d, tags %d:%d \n",
                        iam, kk, msgcnt[0], msgcnt[1], pj,
			SLU_MPI_TAG(0,kk0), SLU_MPI_TAG(1,kk0));
#endif
            }  /* end if ( ToSendR[lk][pj] != EMPTY ) */
        } /* end for pj ... */
    } /* end if( look_ahead[kk] == k0 && kcol == mycol ) */
} /* end while j < nub and perm_u[j] 
 * -- Distributed SuperLU routine (version 2.3) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * October 15, 2008
 *
 * Modified:
 *     Feburary 7, 2001    use MPI_Isend/MPI_Irecv
 *     October 2, 2001     use MPI_Isend/MPI_Irecv with MPI_Test
 *     October 15, 2008  use fewer MPI_Reduce
 * 
*/ #include "superlu_zdefs.h" #define ISEND_IRECV /* * Function prototypes */ #ifdef _CRAY fortran void CTRSM(_fcd, _fcd, _fcd, _fcd, int*, int*, doublecomplex*, doublecomplex*, int*, doublecomplex*, int*); fortran void CGEMM(_fcd, _fcd, int*, int*, int*, doublecomplex*, doublecomplex*, int*, doublecomplex*, int*, doublecomplex*, doublecomplex*, int*); _fcd ftcs1; _fcd ftcs2; _fcd ftcs3; #endif static void gather_diag_to_all(int_t, int_t, doublecomplex [], Glu_persist_t *, LocalLU_t *, gridinfo_t *, int_t, int_t [], int_t [], doublecomplex [], int_t, doublecomplex []); /*! \brief * *
 * Purpose
 * =======
 *
 * pzgstrs_Bglobal solves a system of distributed linear equations
 * A*X = B with a general N-by-N matrix A using the LU factorization
 * computed by pzgstrf.
 * 
 * Arguments
 * =========
 *
 * n      (input) int (global)
 *        The order of the system of linear equations.
 *
 * LUstruct (input) LUstruct_t*
 *        The distributed data structures storing L and U factors.
 *        The L and U factors are obtained from pzgstrf for
 *        the possibly scaled and permuted matrix A.
 *        See superlu_ddefs.h for the definition of 'LUstruct_t'.
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh. It contains the MPI communicator, the number
 *        of process rows (NPROW), the number of process columns (NPCOL),
 *        and my process rank. It is an input argument to all the
 *        parallel routines.
 *        Grid can be initialized by subroutine SUPERLU_GRIDINIT.
 *        See superlu_ddefs.h for the definition of 'gridinfo_t'.
 *
 * B      (input/output) doublecomplex*
 *        On entry, the right-hand side matrix of the possibly equilibrated
 *        and row permuted system.
 *        On exit, the solution matrix of the possibly equilibrated
 *        and row permuted system if info = 0;
 *
 *        NOTE: Currently, the N-by-NRHS  matrix B must reside on all 
 *              processes when calling this routine.
 *
 * ldb    (input) int (global)
 *        Leading dimension of matrix B.
 *
 * nrhs   (input) int (global)
 *        Number of right-hand sides.
 *
 * stat   (output) SuperLUStat_t*
 *        Record the statistics about the triangular solves.
 *        See util.h for the definition of 'SuperLUStat_t'.
 *
 * info   (output) int*
 * 	   = 0: successful exit
 *	   < 0: if info = -i, the i-th argument had an illegal value
 * 
*/ void pzgstrs_Bglobal(int_t n, LUstruct_t *LUstruct, gridinfo_t *grid, doublecomplex *B, int_t ldb, int nrhs, SuperLUStat_t *stat, int *info) { Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; doublecomplex alpha = {1.0, 0.0}; doublecomplex zero = {0.0, 0.0}; doublecomplex *lsum; /* Local running sum of the updates to B-components */ doublecomplex *x; /* X component at step k. */ doublecomplex *lusup, *dest; doublecomplex *recvbuf, *tempv; doublecomplex *rtemp; /* Result of full matrix-vector multiply. */ int_t **Ufstnz_br_ptr = Llu->Ufstnz_br_ptr; int_t *Urbs, *Urbs1; /* Number of row blocks in each block column of U. */ Ucb_indptr_t **Ucb_indptr;/* Vertical linked list pointing to Uindex[] */ int_t **Ucb_valptr; /* Vertical linked list pointing to Unzval[] */ int_t kcol, krow, mycol, myrow; int_t i, ii, il, j, jj, k, lb, ljb, lk, lptr, luptr; int_t nb, nlb, nub, nsupers; int_t *xsup, *lsub, *usub; int_t *ilsum; /* Starting position of each supernode in lsum (LOCAL)*/ int Pc, Pr, iam; int knsupc, nsupr; int ldalsum; /* Number of lsum entries locally owned. */ int maxrecvsz, p, pi; int_t **Lrowind_bc_ptr; doublecomplex **Lnzval_bc_ptr; MPI_Status status; #if defined (ISEND_IRECV) || defined (BSEND) MPI_Request *send_req, recv_req; #endif /*-- Counts used for L-solve --*/ int_t *fmod; /* Modification count for L-solve. */ int_t **fsendx_plist = Llu->fsendx_plist; int_t nfrecvx = Llu->nfrecvx; /* Number of X components to be recv'd. */ int_t *frecv; /* Count of modifications to be recv'd from processes in this row. */ int_t nfrecvmod = 0; /* Count of total modifications to be recv'd. */ int_t nleaf = 0, nroot = 0; /*-- Counts used for U-solve --*/ int_t *bmod; /* Modification count for L-solve. */ int_t **bsendx_plist = Llu->bsendx_plist; int_t nbrecvx = Llu->nbrecvx; /* Number of X components to be recv'd. */ int_t *brecv; /* Count of modifications to be recv'd from processes in this row. */ int_t nbrecvmod = 0; /* Count of total modifications to be recv'd. */ double t; #if ( DEBUGlevel>=2 ) int_t Ublocks = 0; #endif int_t *mod_bit = Llu->mod_bit; /* flag contribution from each row block */ t = SuperLU_timer_(); /* Test input parameters. */ *info = 0; if ( n < 0 ) *info = -1; else if ( nrhs < 0 ) *info = -9; if ( *info ) { pxerr_dist("PZGSTRS_BGLOBAL", grid, -*info); return; } /* * Initialization. */ iam = grid->iam; Pc = grid->npcol; Pr = grid->nprow; myrow = MYROW( iam, grid ); mycol = MYCOL( iam, grid ); nsupers = Glu_persist->supno[n-1] + 1; xsup = Glu_persist->xsup; Lrowind_bc_ptr = Llu->Lrowind_bc_ptr; Lnzval_bc_ptr = Llu->Lnzval_bc_ptr; nlb = CEILING( nsupers, Pr ); /* Number of local block rows. */ stat->ops[SOLVE] = 0.0; Llu->SolveMsgSent = 0; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter pzgstrs_Bglobal()"); #endif /* Save the count to be altered so it can be used by subsequent call to PDGSTRS_BGLOBAL. */ if ( !(fmod = intMalloc_dist(nlb)) ) ABORT("Calloc fails for fmod[]."); for (i = 0; i < nlb; ++i) fmod[i] = Llu->fmod[i]; if ( !(frecv = intMalloc_dist(nlb)) ) ABORT("Malloc fails for frecv[]."); Llu->frecv = frecv; #if defined (ISEND_IRECV) || defined (BSEND) k = SUPERLU_MAX( Llu->nfsendx, Llu->nbsendx ) + nlb; if ( !(send_req = (MPI_Request*) SUPERLU_MALLOC(k*sizeof(MPI_Request))) ) ABORT("Malloc fails for send_req[]."); #endif #ifdef _CRAY ftcs1 = _cptofcd("L", strlen("L")); ftcs2 = _cptofcd("N", strlen("N")); ftcs3 = _cptofcd("U", strlen("U")); #endif /* Obtain ilsum[] and ldalsum for process column 0. */ ilsum = Llu->ilsum; ldalsum = Llu->ldalsum; /* Allocate working storage. */ knsupc = sp_ienv_dist(3); maxrecvsz = knsupc * nrhs + SUPERLU_MAX( XK_H, LSUM_H ); if ( !(lsum = doublecomplexCalloc_dist(((size_t)ldalsum) * nrhs + nlb * LSUM_H)) ) ABORT("Calloc fails for lsum[]."); if ( !(x = doublecomplexMalloc_dist(((size_t)ldalsum) * nrhs + nlb * XK_H)) ) ABORT("Malloc fails for x[]."); if ( !(recvbuf = doublecomplexMalloc_dist(maxrecvsz)) ) ABORT("Malloc fails for recvbuf[]."); if ( !(rtemp = doublecomplexCalloc_dist(maxrecvsz)) ) ABORT("Malloc fails for rtemp[]."); /*--------------------------------------------------- * Forward solve Ly = b. *---------------------------------------------------*/ /* * Copy B into X on the diagonal processes. */ ii = 0; for (k = 0; k < nsupers; ++k) { knsupc = SuperSize( k ); krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ il = LSUM_BLK( lk ); lsum[il - LSUM_H].r = k;/* Block number prepended in the header. */ lsum[il - LSUM_H].i = 0; kcol = PCOL( k, grid ); if ( mycol == kcol ) { /* Diagonal process. */ jj = X_BLK( lk ); x[jj - XK_H].r = k; /* Block number prepended in the header. */ x[jj - XK_H].i = 0; RHS_ITERATE(j) for (i = 0; i < knsupc; ++i) /* X is stored in blocks. */ x[i + jj + j*knsupc] = B[i + ii + j*ldb]; } } ii += knsupc; } /* * Compute frecv[] and nfrecvmod counts on the diagonal processes. */ { superlu_scope_t *scp = &grid->rscp; #if 1 for (k = 0; k < nlb; ++k) mod_bit[k] = 0; for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ kcol = PCOL( k, grid ); if ( mycol != kcol && fmod[lk] ) mod_bit[lk] = 1; /* contribution from off-diagonal */ } } /* Every process receives the count, but it is only useful on the diagonal processes. */ MPI_Allreduce( mod_bit, frecv, nlb, mpi_int_t, MPI_SUM, scp->comm ); for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ kcol = PCOL( k, grid ); if ( mycol == kcol ) { /* Diagonal process. */ nfrecvmod += frecv[lk]; if ( !frecv[lk] && !fmod[lk] ) ++nleaf; } } } #else /* old */ for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ kcol = PCOL( k, grid ); /* Root process in this row scope. */ if ( mycol != kcol && fmod[lk] ) i = 1; /* Contribution from non-diagonal process. */ else i = 0; MPI_Reduce( &i, &frecv[lk], 1, mpi_int_t, MPI_SUM, kcol, scp->comm ); if ( mycol == kcol ) { /* Diagonal process. */ nfrecvmod += frecv[lk]; if ( !frecv[lk] && !fmod[lk] ) ++nleaf; #if ( DEBUGlevel>=2 ) printf("(%2d) frecv[%4d] %2d\n", iam, k, frecv[lk]); assert( frecv[lk] < Pc ); #endif } } } #endif } /* --------------------------------------------------------- Solve the leaf nodes first by all the diagonal processes. --------------------------------------------------------- */ #if ( DEBUGlevel>=2 ) printf("(%2d) nleaf %4d\n", iam, nleaf); #endif for (k = 0; k < nsupers && nleaf; ++k) { krow = PROW( k, grid ); kcol = PCOL( k, grid ); if ( myrow == krow && mycol == kcol ) { /* Diagonal process */ knsupc = SuperSize( k ); lk = LBi( k, grid ); if ( frecv[lk]==0 && fmod[lk]==0 ) { fmod[lk] = -1; /* Do not solve X[k] in the future. */ ii = X_BLK( lk ); lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY CTRSM(ftcs1, ftcs1, ftcs2, ftcs3, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #elif defined (USE_VENDOR_BLAS) ztrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc, 1, 1, 1, 1); #else ztrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif stat->ops[SOLVE] += 4 * knsupc * (knsupc - 1) * nrhs + 10 * knsupc * nrhs; /* complex division */ --nleaf; #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ for (p = 0; p < Pr; ++p) { if ( fsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); #ifdef ISEND_IRECV MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm, &send_req[Llu->SolveMsgSent++]); #else #ifdef BSEND MPI_Bsend( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm ); #else MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm ); #endif #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii-XK_H], pi); #endif } } /* * Perform local block modifications: lsum[i] -= L_i,k * X[k] */ nb = lsub[0] - 1; lptr = BC_HEADER + LB_DESCRIPTOR + knsupc; luptr = knsupc; /* Skip diagonal block L(k,k). */ zlsum_fmod(lsum, x, &x[ii], rtemp, nrhs, knsupc, k, fmod, nb, lptr, luptr, xsup, grid, Llu, send_req,stat); } } /* if diagonal process ... */ } /* for k ... */ /* ----------------------------------------------------------- Compute the internal nodes asynchronously by all processes. ----------------------------------------------------------- */ #if ( DEBUGlevel>=2 ) printf("(%2d) nfrecvx %4d, nfrecvmod %4d, nleaf %4d\n", iam, nfrecvx, nfrecvmod, nleaf); #endif while ( nfrecvx || nfrecvmod ) { /* While not finished. */ /* Receive a message. */ #if 1 MPI_Recv( recvbuf, maxrecvsz, SuperLU_MPI_DOUBLE_COMPLEX, MPI_ANY_SOURCE, MPI_ANY_TAG, grid->comm, &status ); #else /* -MPI- FATAL: Remote protocol queue full */ MPI_Irecv( recvbuf, maxrecvsz, SuperLU_MPI_DOUBLE_COMPLEX, MPI_ANY_SOURCE, MPI_ANY_TAG, grid->comm, &request ); MPI_Wait( &request, &status ); #endif k = (*recvbuf).r; #if ( DEBUGlevel>=2 ) printf("(%2d) Recv'd block %d, tag %2d\n", iam, k, status.MPI_TAG); #endif switch ( status.MPI_TAG ) { case Xk: --nfrecvx; lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; if ( lsub ) { nb = lsub[0]; lptr = BC_HEADER; luptr = 0; knsupc = SuperSize( k ); /* * Perform local block modifications: lsum[i] -= L_i,k * X[k] */ zlsum_fmod(lsum, x, &recvbuf[XK_H], rtemp, nrhs, knsupc, k, fmod, nb, lptr, luptr, xsup, grid, Llu, send_req, stat); } /* if lsub */ break; case LSUM: /* Receiver must be a diagonal process */ --nfrecvmod; lk = LBi( k, grid ); /* Local block number, row-wise. */ ii = X_BLK( lk ); knsupc = SuperSize( k ); tempv = &recvbuf[LSUM_H]; RHS_ITERATE(j) for (i = 0; i < knsupc; ++i) z_add(&x[i + ii + j*knsupc], &x[i + ii + j*knsupc], &tempv[i + j*knsupc]); if ( (--frecv[lk])==0 && fmod[lk]==0 ) { fmod[lk] = -1; /* Do not solve X[k] in the future. */ lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY CTRSM(ftcs1, ftcs1, ftcs2, ftcs3, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #elif defined (USE_VENDOR_BLAS) ztrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc, 1, 1, 1, 1); #else ztrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif stat->ops[SOLVE] += 4 * knsupc * (knsupc - 1) * nrhs + 10 * knsupc * nrhs; /* complex division */ #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ kcol = PCOL( k, grid ); for (p = 0; p < Pr; ++p) { if ( fsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); #ifdef ISEND_IRECV MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm, &send_req[Llu->SolveMsgSent++]); #else #ifdef BSEND MPI_Bsend( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm ); #else MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm ); #endif #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii-XK_H], pi); #endif } } /* * Perform local block modifications. */ nb = lsub[0] - 1; lptr = BC_HEADER + LB_DESCRIPTOR + knsupc; luptr = knsupc; /* Skip diagonal block L(k,k). */ zlsum_fmod(lsum, x, &x[ii], rtemp, nrhs, knsupc, k, fmod, nb, lptr, luptr, xsup, grid, Llu, send_req, stat); } /* if */ break; #if ( DEBUGlevel>=2 ) default: printf("(%2d) Recv'd wrong message tag %4d\n", iam, status.MPI_TAG); break; #endif } /* switch */ } /* while not finished ... */ #if ( PRNTlevel>=2 ) t = SuperLU_timer_() - t; if ( !iam ) printf(".. L-solve time\t%8.2f\n", t); t = SuperLU_timer_(); #endif #if ( DEBUGlevel>=2 ) printf("\n(%d) .. After L-solve: y =\n", iam); for (i = 0, k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); kcol = PCOL( k, grid ); if ( myrow == krow && mycol == kcol ) { /* Diagonal process */ knsupc = SuperSize( k ); lk = LBi( k, grid ); ii = X_BLK( lk ); for (j = 0; j < knsupc; ++j) printf("\t(%d)\t%4d\t%.10f\n", iam, xsup[k]+j, x[ii+j]); } MPI_Barrier( grid->comm ); } #endif SUPERLU_FREE(fmod); SUPERLU_FREE(frecv); SUPERLU_FREE(rtemp); #ifdef ISEND_IRECV for (i = 0; i < Llu->SolveMsgSent; ++i) MPI_Request_free(&send_req[i]); Llu->SolveMsgSent = 0; #endif /*--------------------------------------------------- * Back solve Ux = y. * * The Y components from the forward solve is already * on the diagonal processes. *---------------------------------------------------*/ /* Save the count to be altered so it can be used by subsequent call to PDGSTRS_BGLOBAL. */ if ( !(bmod = intMalloc_dist(nlb)) ) ABORT("Calloc fails for bmod[]."); for (i = 0; i < nlb; ++i) bmod[i] = Llu->bmod[i]; if ( !(brecv = intMalloc_dist(nlb)) ) ABORT("Malloc fails for brecv[]."); Llu->brecv = brecv; /* * Compute brecv[] and nbrecvmod counts on the diagonal processes. */ { superlu_scope_t *scp = &grid->rscp; #if 1 for (k = 0; k < nlb; ++k) mod_bit[k] = 0; for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ kcol = PCOL( k, grid ); /* Root process in this row scope. */ if ( mycol != kcol && bmod[lk] ) mod_bit[lk] = 1; /* Contribution from off-diagonal */ } } /* Every process receives the count, but it is only useful on the diagonal processes. */ MPI_Allreduce( mod_bit, brecv, nlb, mpi_int_t, MPI_SUM, scp->comm ); for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ kcol = PCOL( k, grid ); /* Root process in this row scope. */ if ( mycol == kcol ) { /* Diagonal process. */ nbrecvmod += brecv[lk]; if ( !brecv[lk] && !bmod[lk] ) ++nroot; #if ( DEBUGlevel>=2 ) printf("(%2d) brecv[%4d] %2d\n", iam, k, brecv[lk]); assert( brecv[lk] < Pc ); #endif } } } #else /* old */ for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ kcol = PCOL( k, grid ); /* Root process in this row scope. */ if ( mycol != kcol && bmod[lk] ) i = 1; /* Contribution from non-diagonal process. */ else i = 0; MPI_Reduce( &i, &brecv[lk], 1, mpi_int_t, MPI_SUM, kcol, scp->comm ); if ( mycol == kcol ) { /* Diagonal process. */ nbrecvmod += brecv[lk]; if ( !brecv[lk] && !bmod[lk] ) ++nroot; #if ( DEBUGlevel>=2 ) printf("(%2d) brecv[%4d] %2d\n", iam, k, brecv[lk]); assert( brecv[lk] < Pc ); #endif } } } #endif } /* Re-initialize lsum to zero. Each block header is already in place. */ for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { knsupc = SuperSize( k ); lk = LBi( k, grid ); il = LSUM_BLK( lk ); dest = &lsum[il]; RHS_ITERATE(j) for (i = 0; i < knsupc; ++i) dest[i + j*knsupc] = zero; } } /* Set up additional pointers for the index and value arrays of U. nub is the number of local block columns. */ nub = CEILING( nsupers, Pc ); /* Number of local block columns. */ if ( !(Urbs = (int_t *) intCalloc_dist(2*((size_t)nub))) ) ABORT("Malloc fails for Urbs[]"); /* Record number of nonzero blocks in a block column. */ Urbs1 = Urbs + nub; if ( !(Ucb_indptr = SUPERLU_MALLOC(nub * sizeof(Ucb_indptr_t *))) ) ABORT("Malloc fails for Ucb_indptr[]"); if ( !(Ucb_valptr = SUPERLU_MALLOC(nub * sizeof(int_t *))) ) ABORT("Malloc fails for Ucb_valptr[]"); /* Count number of row blocks in a block column. One pass of the skeleton graph of U. */ for (lk = 0; lk < nlb; ++lk) { usub = Ufstnz_br_ptr[lk]; if ( usub ) { /* Not an empty block row. */ /* usub[0] -- number of column blocks in this block row. */ #if ( DEBUGlevel>=2 ) Ublocks += usub[0]; #endif i = BR_HEADER; /* Pointer in index array. */ for (lb = 0; lb < usub[0]; ++lb) { /* For all column blocks. */ k = usub[i]; /* Global block number */ ++Urbs[LBj(k,grid)]; i += UB_DESCRIPTOR + SuperSize( k ); } } } /* Set up the vertical linked lists for the row blocks. One pass of the skeleton graph of U. */ for (lb = 0; lb < nub; ++lb) { if ( Urbs[lb] ) { /* Not an empty block column. */ if ( !(Ucb_indptr[lb] = SUPERLU_MALLOC(Urbs[lb] * sizeof(Ucb_indptr_t))) ) ABORT("Malloc fails for Ucb_indptr[lb][]"); if ( !(Ucb_valptr[lb] = (int_t *) intMalloc_dist(Urbs[lb])) ) ABORT("Malloc fails for Ucb_valptr[lb][]"); } } for (lk = 0; lk < nlb; ++lk) { /* For each block row. */ usub = Ufstnz_br_ptr[lk]; if ( usub ) { /* Not an empty block row. */ i = BR_HEADER; /* Pointer in index array. */ j = 0; /* Pointer in nzval array. */ for (lb = 0; lb < usub[0]; ++lb) { /* For all column blocks. */ k = usub[i]; /* Global block number, column-wise. */ ljb = LBj( k, grid ); /* Local block number, column-wise. */ Ucb_indptr[ljb][Urbs1[ljb]].lbnum = lk; Ucb_indptr[ljb][Urbs1[ljb]].indpos = i; Ucb_valptr[ljb][Urbs1[ljb]] = j; ++Urbs1[ljb]; j += usub[i+1]; i += UB_DESCRIPTOR + SuperSize( k ); } } } #if ( DEBUGlevel>=2 ) for (p = 0; p < Pr*Pc; ++p) { if (iam == p) { printf("(%2d) .. Ublocks %d\n", iam, Ublocks); for (lb = 0; lb < nub; ++lb) { printf("(%2d) Local col %2d: # row blocks %2d\n", iam, lb, Urbs[lb]); if ( Urbs[lb] ) { for (i = 0; i < Urbs[lb]; ++i) printf("(%2d) .. row blk %2d:\ lbnum %d, indpos %d, valpos %d\n", iam, i, Ucb_indptr[lb][i].lbnum, Ucb_indptr[lb][i].indpos, Ucb_valptr[lb][i]); } } } MPI_Barrier( grid->comm ); } for (p = 0; p < Pr*Pc; ++p) { if ( iam == p ) { printf("\n(%d) bsendx_plist[][]", iam); for (lb = 0; lb < nub; ++lb) { printf("\n(%d) .. local col %2d: ", iam, lb); for (i = 0; i < Pr; ++i) printf("%4d", bsendx_plist[lb][i]); } printf("\n"); } MPI_Barrier( grid->comm ); } #endif /* DEBUGlevel */ #if ( PRNTlevel>=2 ) t = SuperLU_timer_() - t; if ( !iam) printf(".. Setup U-solve time\t%8.2f\n", t); t = SuperLU_timer_(); #endif /* * Solve the roots first by all the diagonal processes. */ #if ( DEBUGlevel>=2 ) printf("(%2d) nroot %4d\n", iam, nroot); #endif for (k = nsupers-1; k >= 0 && nroot; --k) { krow = PROW( k, grid ); kcol = PCOL( k, grid ); if ( myrow == krow && mycol == kcol ) { /* Diagonal process. */ knsupc = SuperSize( k ); lk = LBi( k, grid ); /* Local block number, row-wise. */ if ( brecv[lk]==0 && bmod[lk]==0 ) { bmod[lk] = -1; /* Do not solve X[k] in the future. */ ii = X_BLK( lk ); lk = LBj( k, grid ); /* Local block number, column-wise */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY CTRSM(ftcs1, ftcs3, ftcs2, ftcs2, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #elif defined (USE_VENDOR_BLAS) ztrsm_("L", "U", "N", "N", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc, 1, 1, 1, 1); #else ztrsm_("L", "U", "N", "N", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif stat->ops[SOLVE] += 4 * knsupc * (knsupc + 1) * nrhs + 10 * knsupc * nrhs; /* complex division */ --nroot; #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ for (p = 0; p < Pr; ++p) { if ( bsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); #ifdef ISEND_IRECV MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm, &send_req[Llu->SolveMsgSent++]); #else #ifdef BSEND MPI_Bsend( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm ); #else MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm ); #endif #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii-XK_H], pi); #endif } } /* * Perform local block modifications: lsum[i] -= U_i,k * X[k] */ if ( Urbs[lk] ) zlsum_bmod(lsum, x, &x[ii], nrhs, k, bmod, Urbs, Ucb_indptr, Ucb_valptr, xsup, grid, Llu, send_req, stat); } /* if root ... */ } /* if diagonal process ... */ } /* for k ... */ /* * Compute the internal nodes asynchronously by all processes. */ while ( nbrecvx || nbrecvmod ) { /* While not finished. */ /* Receive a message. */ MPI_Recv( recvbuf, maxrecvsz, SuperLU_MPI_DOUBLE_COMPLEX, MPI_ANY_SOURCE, MPI_ANY_TAG, grid->comm, &status ); k = (*recvbuf).r; #if ( DEBUGlevel>=2 ) printf("(%2d) Recv'd block %d, tag %2d\n", iam, k, status.MPI_TAG); #endif switch ( status.MPI_TAG ) { case Xk: --nbrecvx; lk = LBj( k, grid ); /* Local block number, column-wise. */ /* * Perform local block modifications: * lsum[i] -= U_i,k * X[k] */ zlsum_bmod(lsum, x, &recvbuf[XK_H], nrhs, k, bmod, Urbs, Ucb_indptr, Ucb_valptr, xsup, grid, Llu, send_req, stat); break; case LSUM: /* Receiver must be a diagonal process */ --nbrecvmod; lk = LBi( k, grid ); /* Local block number, row-wise. */ ii = X_BLK( lk ); knsupc = SuperSize( k ); tempv = &recvbuf[LSUM_H]; RHS_ITERATE(j) for (i = 0; i < knsupc; ++i) z_add(&x[i + ii + j*knsupc], &x[i + ii + j*knsupc], &tempv[i + j*knsupc]); if ( (--brecv[lk])==0 && bmod[lk]==0 ) { bmod[lk] = -1; /* Do not solve X[k] in the future. */ lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY CTRSM(ftcs1, ftcs3, ftcs2, ftcs2, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #elif defined (USE_VENDOR_BLAS) ztrsm_("L", "U", "N", "N", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc, 1, 1, 1, 1); #else ztrsm_("L", "U", "N", "N", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif stat->ops[SOLVE] += 4 * knsupc * (knsupc + 1) * nrhs + 10 * knsupc * nrhs; /* complex division */ #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ kcol = PCOL( k, grid ); for (p = 0; p < Pr; ++p) { if ( bsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); #ifdef ISEND_IRECV MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm, &send_req[Llu->SolveMsgSent++] ); #else #ifdef BSEND MPI_Bsend( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm ); #else MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm ); #endif #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii - XK_H], pi); #endif } } /* * Perform local block modifications: * lsum[i] -= U_i,k * X[k] */ if ( Urbs[lk] ) zlsum_bmod(lsum, x, &x[ii], nrhs, k, bmod, Urbs, Ucb_indptr, Ucb_valptr, xsup, grid, Llu, send_req, stat); } /* if becomes solvable */ break; #if ( DEBUGlevel>=2 ) default: printf("(%2d) Recv'd wrong message tag %4d\n", iam, status.MPI_TAG); break; #endif } /* switch */ } /* while not finished ... */ #if ( PRNTlevel>=2 ) t = SuperLU_timer_() - t; if ( !iam ) printf(".. U-solve time\t%8.2f\n", t); #endif /* Copy the solution X into B (on all processes). */ { int_t num_diag_procs, *diag_procs, *diag_len; doublecomplex *work; get_diag_procs(n, Glu_persist, grid, &num_diag_procs, &diag_procs, &diag_len); jj = diag_len[0]; for (j = 1; j < num_diag_procs; ++j) jj = SUPERLU_MAX(jj, diag_len[j]); if ( !(work = doublecomplexMalloc_dist(((size_t)jj)*nrhs)) ) ABORT("Malloc fails for work[]"); gather_diag_to_all(n, nrhs, x, Glu_persist, Llu, grid, num_diag_procs, diag_procs, diag_len, B, ldb, work); SUPERLU_FREE(diag_procs); SUPERLU_FREE(diag_len); SUPERLU_FREE(work); } /* Deallocate storage. */ SUPERLU_FREE(lsum); SUPERLU_FREE(x); SUPERLU_FREE(recvbuf); for (i = 0; i < nub; ++i) if ( Urbs[i] ) { SUPERLU_FREE(Ucb_indptr[i]); SUPERLU_FREE(Ucb_valptr[i]); } SUPERLU_FREE(Ucb_indptr); SUPERLU_FREE(Ucb_valptr); SUPERLU_FREE(Urbs); SUPERLU_FREE(bmod); SUPERLU_FREE(brecv); #ifdef ISEND_IRECV for (i = 0; i < Llu->SolveMsgSent; ++i) MPI_Request_free(&send_req[i]); SUPERLU_FREE(send_req); #endif #ifdef BSEND SUPERLU_FREE(send_req); #endif stat->utime[SOLVE] = SuperLU_timer_() - t; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pzgstrs_Bglobal()"); #endif } /* PZGSTRS_BGLOBAL */ /* * Gather the components of x vector on the diagonal processes * onto all processes, and combine them into the global vector y. */ static void gather_diag_to_all(int_t n, int_t nrhs, doublecomplex x[], Glu_persist_t *Glu_persist, LocalLU_t *Llu, gridinfo_t *grid, int_t num_diag_procs, int_t diag_procs[], int_t diag_len[], doublecomplex y[], int_t ldy, doublecomplex work[]) { int_t i, ii, j, k, lk, lwork, nsupers, p; int_t *ilsum, *xsup; int iam, knsupc, pkk; doublecomplex *x_col, *y_col; iam = grid->iam; nsupers = Glu_persist->supno[n-1] + 1; xsup = Glu_persist->xsup; ilsum = Llu->ilsum; for (p = 0; p < num_diag_procs; ++p) { pkk = diag_procs[p]; if ( iam == pkk ) { /* Copy x vector into a buffer. */ lwork = 0; for (k = p; k < nsupers; k += num_diag_procs) { knsupc = SuperSize( k ); lk = LBi( k, grid ); ii = X_BLK( lk ); /*ilsum[lk] + (lk+1)*XK_H;*/ x_col = &x[ii]; for (j = 0; j < nrhs; ++j) { for (i = 0; i < knsupc; ++i) work[i+lwork] = x_col[i]; lwork += knsupc; x_col += knsupc; } } MPI_Bcast( work, lwork, SuperLU_MPI_DOUBLE_COMPLEX, pkk, grid->comm ); } else { MPI_Bcast( work, diag_len[p]*nrhs, SuperLU_MPI_DOUBLE_COMPLEX, pkk, grid->comm ); } /* Scatter work[] into global y vector. */ lwork = 0; for (k = p; k < nsupers; k += num_diag_procs) { knsupc = SuperSize( k ); ii = FstBlockC( k ); y_col = &y[ii]; for (j = 0; j < nrhs; ++j) { for (i = 0; i < knsupc; ++i) y_col[i] = work[i+lwork]; lwork += knsupc; y_col += ldy; } } } } /* GATHER_DIAG_TO_ALL */ SuperLU_DIST_5.3.0/SRC/dlaqgs_dist.c0000644013363400111340000001002113233431301015717 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Equilibrates a general sparse M by N matrix A */ /* * File name: dlaqgs.c * History: Modified from LAPACK routine DLAQGE */ #include #include "superlu_ddefs.h" /*! \brief
    Purpose   
    =======   

    DLAQGS_dist equilibrates a general sparse M by N matrix A using the row
    and column scaling factors in the vectors R and C.   

    See supermatrix.h for the definition of 'SuperMatrix' structure.

    Arguments   
    =========   

    A       (input/output) SuperMatrix*
            On exit, the equilibrated matrix.  See EQUED for the form of 
            the equilibrated matrix. The type of A can be:
	    Stype = SLU_NC; Dtype = SLU_D; Mtype = SLU_GE.
	    
    R       (input) double*, dimension (A->nrow)
            The row scale factors for A.
	    
    C       (input) double*, dimension (A->ncol)
            The column scale factors for A.
	    
    ROWCND  (input) double
            Ratio of the smallest R(i) to the largest R(i).
	    
    COLCND  (input) double
            Ratio of the smallest C(i) to the largest C(i).
	    
    AMAX    (input) double
            Absolute value of largest matrix entry.
	    
    EQUED   (output) char*
            Specifies the form of equilibration that was done.   
            = 'N':  No equilibration   
            = 'R':  Row equilibration, i.e., A has been premultiplied by  
                    diag(R).   
            = 'C':  Column equilibration, i.e., A has been postmultiplied  
                    by diag(C).   
            = 'B':  Both row and column equilibration, i.e., A has been
                    replaced by diag(R) * A * diag(C).   

    Internal Parameters   
    ===================   

    THRESH is a threshold value used to decide if row or column scaling   
    should be done based on the ratio of the row or column scaling   
    factors.  If ROWCND < THRESH, row scaling is done, and if   
    COLCND < THRESH, column scaling is done.   

    LARGE and SMALL are threshold values used to decide if row scaling   
    should be done based on the absolute size of the largest matrix   
    element.  If AMAX > LARGE or AMAX < SMALL, row scaling is done.   

    ===================================================================== 
*/ void dlaqgs_dist(SuperMatrix *A, double *r, double *c, double rowcnd, double colcnd, double amax, char *equed) { #define THRESH (0.1) /* Local variables */ NCformat *Astore; double *Aval; int_t i, j, irow; double large, small, cj; /* Quick return if possible */ if (A->nrow <= 0 || A->ncol <= 0) { *(unsigned char *)equed = 'N'; return; } Astore = (NCformat *) A->Store; Aval = (double *) Astore->nzval; /* Initialize LARGE and SMALL. */ small = dmach_dist("Safe minimum") / dmach_dist("Precision"); large = 1. / small; if (rowcnd >= THRESH && amax >= small && amax <= large) { if (colcnd >= THRESH) *(unsigned char *)equed = 'N'; else { /* Column scaling */ for (j = 0; j < A->ncol; ++j) { cj = c[j]; for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { Aval[i] *= cj; } } *(unsigned char *)equed = 'C'; } } else if (colcnd >= THRESH) { /* Row scaling, no column scaling */ for (j = 0; j < A->ncol; ++j) for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; Aval[i] *= r[irow]; } *(unsigned char *)equed = 'R'; } else { /* Row and column scaling */ for (j = 0; j < A->ncol; ++j) { cj = c[j]; for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; Aval[i] *= cj * r[irow]; } } *(unsigned char *)equed = 'B'; } return; } /* dlaqgs_dist */ SuperLU_DIST_5.3.0/SRC/old_colamd.c0000644013363400111340000023415413233431301015535 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief An approximate minimum degree column ordering algorithm */ /* ========================================================================== */ /* === colamd - a sparse matrix column ordering algorithm =================== */ /* ========================================================================== */ /* colamd: An approximate minimum degree column ordering algorithm. Purpose: Colamd computes a permutation Q such that the Cholesky factorization of (AQ)'(AQ) has less fill-in and requires fewer floating point operations than A'A. This also provides a good ordering for sparse partial pivoting methods, P(AQ) = LU, where Q is computed prior to numerical factorization, and P is computed during numerical factorization via conventional partial pivoting with row interchanges. Colamd is the column ordering method used in SuperLU, part of the ScaLAPACK library. It is also available as user-contributed software for Matlab 5.2, available from MathWorks, Inc. (http://www.mathworks.com). This routine can be used in place of COLMMD in Matlab. By default, the \ and / operators in Matlab perform a column ordering (using COLMMD) prior to LU factorization using sparse partial pivoting, in the built-in Matlab LU(A) routine. Authors: The authors of the code itself are Stefan I. Larimore and Timothy A. Davis (davis@cise.ufl.edu), University of Florida. The algorithm was developed in collaboration with John Gilbert, Xerox PARC, and Esmond Ng, Oak Ridge National Laboratory. Date: August 3, 1998. Version 1.0. Acknowledgements: This work was supported by the National Science Foundation, under grants DMS-9504974 and DMS-9803599. Notice: Copyright (c) 1998 by the University of Florida. All Rights Reserved. THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. Permission is hereby granted to use or copy this program for any purpose, provided the above notices are retained on all copies. User documentation of any code that uses this code must cite the Authors, the Copyright, and "Used by permission." If this code is accessible from within Matlab, then typing "help colamd" or "colamd" (with no arguments) must cite the Authors. Permission to modify the code and to distribute modified code is granted, provided the above notices are retained, and a notice that the code was modified is included with the above copyright notice. You must also retain the Availability information below, of the original version. This software is provided free of charge. Availability: This file is located at http://www.cise.ufl.edu/~davis/colamd/colamd.c The colamd.h file is required, located in the same directory. The colamdmex.c file provides a Matlab interface for colamd. The symamdmex.c file provides a Matlab interface for symamd, which is a symmetric ordering based on this code, colamd.c. All codes are purely ANSI C compliant (they use no Unix-specific routines, include files, etc.). */ /* ========================================================================== */ /* === Description of user-callable routines ================================ */ /* ========================================================================== */ /* Each user-callable routine (declared as PUBLIC) is briefly described below. Refer to the comments preceding each routine for more details. ---------------------------------------------------------------------------- colamd_recommended: ---------------------------------------------------------------------------- Usage: Alen = colamd_recommended (nnz, n_row, n_col) ; Purpose: Returns recommended value of Alen for use by colamd. Returns -1 if any input argument is negative. Arguments: int nnz ; Number of nonzeros in the matrix A. This must be the same value as p [n_col] in the call to colamd - otherwise you will get a wrong value of the recommended memory to use. int n_row ; Number of rows in the matrix A. int n_col ; Number of columns in the matrix A. ---------------------------------------------------------------------------- colamd_set_defaults: ---------------------------------------------------------------------------- Usage: colamd_set_defaults (knobs) ; Purpose: Sets the default parameters. Arguments: double knobs [COLAMD_KNOBS] ; Output only. Rows with more than (knobs [COLAMD_DENSE_ROW] * n_col) entries are removed prior to ordering. Columns with more than (knobs [COLAMD_DENSE_COL] * n_row) entries are removed prior to ordering, and placed last in the output column ordering. Default values of these two knobs are both 0.5. Currently, only knobs [0] and knobs [1] are used, but future versions may use more knobs. If so, they will be properly set to their defaults by the future version of colamd_set_defaults, so that the code that calls colamd will not need to change, assuming that you either use colamd_set_defaults, or pass a (double *) NULL pointer as the knobs array to colamd. ---------------------------------------------------------------------------- colamd: ---------------------------------------------------------------------------- Usage: colamd (n_row, n_col, Alen, A, p, knobs) ; Purpose: Computes a column ordering (Q) of A such that P(AQ)=LU or (AQ)'AQ=LL' have less fill-in and require fewer floating point operations than factorizing the unpermuted matrix A or A'A, respectively. Arguments: int n_row ; Number of rows in the matrix A. Restriction: n_row >= 0. Colamd returns FALSE if n_row is negative. int n_col ; Number of columns in the matrix A. Restriction: n_col >= 0. Colamd returns FALSE if n_col is negative. int Alen ; Restriction (see note): Alen >= 2*nnz + 6*(n_col+1) + 4*(n_row+1) + n_col + COLAMD_STATS Colamd returns FALSE if these conditions are not met. Note: this restriction makes an modest assumption regarding the size of the two typedef'd structures, below. We do, however, guarantee that Alen >= colamd_recommended (nnz, n_row, n_col) will be sufficient. int A [Alen] ; Input argument, stats on output. A is an integer array of size Alen. Alen must be at least as large as the bare minimum value given above, but this is very low, and can result in excessive run time. For best performance, we recommend that Alen be greater than or equal to colamd_recommended (nnz, n_row, n_col), which adds nnz/5 to the bare minimum value given above. On input, the row indices of the entries in column c of the matrix are held in A [(p [c]) ... (p [c+1]-1)]. The row indices in a given column c need not be in ascending order, and duplicate row indices may be be present. However, colamd will work a little faster if both of these conditions are met (Colamd puts the matrix into this format, if it finds that the the conditions are not met). The matrix is 0-based. That is, rows are in the range 0 to n_row-1, and columns are in the range 0 to n_col-1. Colamd returns FALSE if any row index is out of range. The contents of A are modified during ordering, and are thus undefined on output with the exception of a few statistics about the ordering (A [0..COLAMD_STATS-1]): A [0]: number of dense or empty rows ignored. A [1]: number of dense or empty columns ignored (and ordered last in the output permutation p) A [2]: number of garbage collections performed. A [3]: 0, if all row indices in each column were in sorted order, and no duplicates were present. 1, otherwise (in which case colamd had to do more work) Note that a row can become "empty" if it contains only "dense" and/or "empty" columns, and similarly a column can become "empty" if it only contains "dense" and/or "empty" rows. Future versions may return more statistics in A, but the usage of these 4 entries in A will remain unchanged. int p [n_col+1] ; Both input and output argument. p is an integer array of size n_col+1. On input, it holds the "pointers" for the column form of the matrix A. Column c of the matrix A is held in A [(p [c]) ... (p [c+1]-1)]. The first entry, p [0], must be zero, and p [c] <= p [c+1] must hold for all c in the range 0 to n_col-1. The value p [n_col] is thus the total number of entries in the pattern of the matrix A. Colamd returns FALSE if these conditions are not met. On output, if colamd returns TRUE, the array p holds the column permutation (Q, for P(AQ)=LU or (AQ)'(AQ)=LL'), where p [0] is the first column index in the new ordering, and p [n_col-1] is the last. That is, p [k] = j means that column j of A is the kth pivot column, in AQ, where k is in the range 0 to n_col-1 (p [0] = j means that column j of A is the first column in AQ). If colamd returns FALSE, then no permutation is returned, and p is undefined on output. double knobs [COLAMD_KNOBS] ; Input only. See colamd_set_defaults for a description. If the knobs array is not present (that is, if a (double *) NULL pointer is passed in its place), then the default values of the parameters are used instead. */ /* ========================================================================== */ /* === Include files ======================================================== */ /* ========================================================================== */ /* limits.h: the largest positive integer (INT_MAX) */ #include /* colamd.h: knob array size, stats output size, and global prototypes */ #include "colamd.h" /* ========================================================================== */ /* === Scaffolding code definitions ======================================== */ /* ========================================================================== */ /* Ensure that debugging is turned off: */ #ifndef NDEBUG #define NDEBUG #endif /* assert.h: the assert macro (no debugging if NDEBUG is defined) */ #include /* Our "scaffolding code" philosophy: In our opinion, well-written library code should keep its "debugging" code, and just normally have it turned off by the compiler so as not to interfere with performance. This serves several purposes: (1) assertions act as comments to the reader, telling you what the code expects at that point. All assertions will always be true (unless there really is a bug, of course). (2) leaving in the scaffolding code assists anyone who would like to modify the code, or understand the algorithm (by reading the debugging output, one can get a glimpse into what the code is doing). (3) (gasp!) for actually finding bugs. This code has been heavily tested and "should" be fully functional and bug-free ... but you never know... To enable debugging, comment out the "#define NDEBUG" above. The code will become outrageously slow when debugging is enabled. To control the level of debugging output, set an environment variable D to 0 (little), 1 (some), 2, 3, or 4 (lots). */ /* ========================================================================== */ /* === Row and Column structures ============================================ */ /* ========================================================================== */ typedef struct ColInfo_struct { int start ; /* index for A of first row in this column, or DEAD */ /* if column is dead */ int length ; /* number of rows in this column */ union { int thickness ; /* number of original columns represented by this */ /* col, if the column is alive */ int parent ; /* parent in parent tree super-column structure, if */ /* the column is dead */ } shared1 ; union { int score ; /* the score used to maintain heap, if col is alive */ int order ; /* pivot ordering of this column, if col is dead */ } shared2 ; union { int headhash ; /* head of a hash bucket, if col is at the head of */ /* a degree list */ int hash ; /* hash value, if col is not in a degree list */ int prev ; /* previous column in degree list, if col is in a */ /* degree list (but not at the head of a degree list) */ } shared3 ; union { int degree_next ; /* next column, if col is in a degree list */ int hash_next ; /* next column, if col is in a hash list */ } shared4 ; } ColInfo ; typedef struct RowInfo_struct { int start ; /* index for A of first col in this row */ int length ; /* number of principal columns in this row */ union { int degree ; /* number of principal & non-principal columns in row */ int p ; /* used as a row pointer in init_rows_cols () */ } shared1 ; union { int mark ; /* for computing set differences and marking dead rows*/ int first_column ;/* first column in row (used in garbage collection) */ } shared2 ; } RowInfo ; /* ========================================================================== */ /* === Definitions ========================================================== */ /* ========================================================================== */ #define MAX(a,b) (((a) > (b)) ? (a) : (b)) #define MIN(a,b) (((a) < (b)) ? (a) : (b)) #define ONES_COMPLEMENT(r) (-(r)-1) #define TRUE (1) #define FALSE (0) #define EMPTY (-1) /* Row and column status */ #define ALIVE (0) #define DEAD (-1) /* Column status */ #define DEAD_PRINCIPAL (-1) #define DEAD_NON_PRINCIPAL (-2) /* Macros for row and column status update and checking. */ #define ROW_IS_DEAD(r) ROW_IS_MARKED_DEAD (Row[r].shared2.mark) #define ROW_IS_MARKED_DEAD(row_mark) (row_mark < ALIVE) #define ROW_IS_ALIVE(r) (Row [r].shared2.mark >= ALIVE) #define COL_IS_DEAD(c) (Col [c].start < ALIVE) #define COL_IS_ALIVE(c) (Col [c].start >= ALIVE) #define COL_IS_DEAD_PRINCIPAL(c) (Col [c].start == DEAD_PRINCIPAL) #define KILL_ROW(r) { Row [r].shared2.mark = DEAD ; } #define KILL_PRINCIPAL_COL(c) { Col [c].start = DEAD_PRINCIPAL ; } #define KILL_NON_PRINCIPAL_COL(c) { Col [c].start = DEAD_NON_PRINCIPAL ; } /* Routines are either PUBLIC (user-callable) or PRIVATE (not user-callable) */ #define PUBLIC #define PRIVATE static /* ========================================================================== */ /* === Prototypes of PRIVATE routines ======================================= */ /* ========================================================================== */ PRIVATE int init_rows_cols ( int n_row, int n_col, RowInfo Row [], ColInfo Col [], int A [], int p [] ) ; PRIVATE void init_scoring ( int n_row, int n_col, RowInfo Row [], ColInfo Col [], int A [], int head [], double knobs [COLAMD_KNOBS], int *p_n_row2, int *p_n_col2, int *p_max_deg ) ; PRIVATE int find_ordering ( int n_row, int n_col, int Alen, RowInfo Row [], ColInfo Col [], int A [], int head [], int n_col2, int max_deg, int pfree ) ; PRIVATE void order_children ( int n_col, ColInfo Col [], int p [] ) ; PRIVATE void detect_super_cols ( #ifndef NDEBUG int n_col, RowInfo Row [], #endif ColInfo Col [], int A [], int head [], int row_start, int row_length ) ; PRIVATE int garbage_collection ( int n_row, int n_col, RowInfo Row [], ColInfo Col [], int A [], int *pfree ) ; PRIVATE int clear_mark ( int n_row, RowInfo Row [] ) ; /* ========================================================================== */ /* === Debugging definitions ================================================ */ /* ========================================================================== */ #ifndef NDEBUG /* === With debugging ======================================================= */ /* stdlib.h: for getenv and atoi, to get debugging level from environment */ #include /* stdio.h: for printf (no printing if debugging is turned off) */ #include PRIVATE void debug_deg_lists ( int n_row, int n_col, RowInfo Row [], ColInfo Col [], int head [], int min_score, int should, int max_deg ) ; PRIVATE void debug_mark ( int n_row, RowInfo Row [], int tag_mark, int max_mark ) ; PRIVATE void debug_matrix ( int n_row, int n_col, RowInfo Row [], ColInfo Col [], int A [] ) ; PRIVATE void debug_structures ( int n_row, int n_col, RowInfo Row [], ColInfo Col [], int A [], int n_col2 ) ; /* the following is the *ONLY* global variable in this file, and is only */ /* present when debugging */ PRIVATE int debug_colamd ; /* debug print level */ #define DEBUG0(params) { (void) printf params ; } #define DEBUG1(params) { if (debug_colamd >= 1) (void) printf params ; } #define DEBUG2(params) { if (debug_colamd >= 2) (void) printf params ; } #define DEBUG3(params) { if (debug_colamd >= 3) (void) printf params ; } #define DEBUG4(params) { if (debug_colamd >= 4) (void) printf params ; } #else /* === No debugging ========================================================= */ #define DEBUG0(params) ; #define DEBUG1(params) ; #define DEBUG2(params) ; #define DEBUG3(params) ; #define DEBUG4(params) ; #endif /* ========================================================================== */ /* ========================================================================== */ /* === USER-CALLABLE ROUTINES: ============================================== */ /* ========================================================================== */ /* ========================================================================== */ /* === colamd_recommended =================================================== */ /* ========================================================================== */ /* The colamd_recommended routine returns the suggested size for Alen. This value has been determined to provide good balance between the number of garbage collections and the memory requirements for colamd. */ PUBLIC int colamd_recommended /* returns recommended value of Alen. */ ( /* === Parameters ======================================================= */ int nnz, /* number of nonzeros in A */ int n_row, /* number of rows in A */ int n_col /* number of columns in A */ ) { /* === Local variables ================================================== */ int minimum ; /* bare minimum requirements */ int recommended ; /* recommended value of Alen */ if (nnz < 0 || n_row < 0 || n_col < 0) { /* return -1 if any input argument is corrupted */ DEBUG0 (("colamd_recommended error!")) ; DEBUG0 ((" nnz: %d, n_row: %d, n_col: %d\n", nnz, n_row, n_col)) ; return (-1) ; } minimum = 2 * (nnz) /* for A */ + (((n_col) + 1) * sizeof (ColInfo) / sizeof (int)) /* for Col */ + (((n_row) + 1) * sizeof (RowInfo) / sizeof (int)) /* for Row */ + n_col /* minimum elbow room to guarrantee success */ + COLAMD_STATS ; /* for output statistics */ /* recommended is equal to the minimum plus enough memory to keep the */ /* number garbage collections low */ recommended = minimum + nnz/5 ; return (recommended) ; } /* ========================================================================== */ /* === colamd_set_defaults ================================================== */ /* ========================================================================== */ /* The colamd_set_defaults routine sets the default values of the user- controllable parameters for colamd: knobs [0] rows with knobs[0]*n_col entries or more are removed prior to ordering. knobs [1] columns with knobs[1]*n_row entries or more are removed prior to ordering, and placed last in the column permutation. knobs [2..19] unused, but future versions might use this */ PUBLIC void colamd_set_defaults ( /* === Parameters ======================================================= */ double knobs [COLAMD_KNOBS] /* knob array */ ) { /* === Local variables ================================================== */ int i ; if (!knobs) { return ; /* no knobs to initialize */ } for (i = 0 ; i < COLAMD_KNOBS ; i++) { knobs [i] = 0 ; } knobs [COLAMD_DENSE_ROW] = 0.5 ; /* ignore rows over 50% dense */ knobs [COLAMD_DENSE_COL] = 0.5 ; /* ignore columns over 50% dense */ } /* ========================================================================== */ /* === colamd =============================================================== */ /* ========================================================================== */ /* The colamd routine computes a column ordering Q of a sparse matrix A such that the LU factorization P(AQ) = LU remains sparse, where P is selected via partial pivoting. The routine can also be viewed as providing a permutation Q such that the Cholesky factorization (AQ)'(AQ) = LL' remains sparse. On input, the nonzero patterns of the columns of A are stored in the array A, in order 0 to n_col-1. A is held in 0-based form (rows in the range 0 to n_row-1 and columns in the range 0 to n_col-1). Row indices for column c are located in A [(p [c]) ... (p [c+1]-1)], where p [0] = 0, and thus p [n_col] is the number of entries in A. The matrix is destroyed on output. The row indices within each column do not have to be sorted (from small to large row indices), and duplicate row indices may be present. However, colamd will work a little faster if columns are sorted and no duplicates are present. Matlab 5.2 always passes the matrix with sorted columns, and no duplicates. The integer array A is of size Alen. Alen must be at least of size (where nnz is the number of entries in A): nnz for the input column form of A + nnz for a row form of A that colamd generates + 6*(n_col+1) for a ColInfo Col [0..n_col] array (this assumes sizeof (ColInfo) is 6 int's). + 4*(n_row+1) for a RowInfo Row [0..n_row] array (this assumes sizeof (RowInfo) is 4 int's). + elbow_room must be at least n_col. We recommend at least nnz/5 in addition to that. If sufficient, changes in the elbow room affect the ordering time only, not the ordering itself. + COLAMD_STATS for the output statistics Colamd returns FALSE is memory is insufficient, or TRUE otherwise. On input, the caller must specify: n_row the number of rows of A n_col the number of columns of A Alen the size of the array A A [0 ... nnz-1] the row indices, where nnz = p [n_col] A [nnz ... Alen-1] (need not be initialized by the user) p [0 ... n_col] the column pointers, p [0] = 0, and p [n_col] is the number of entries in A. Column c of A is stored in A [p [c] ... p [c+1]-1]. knobs [0 ... 19] a set of parameters that control the behavior of colamd. If knobs is a NULL pointer the defaults are used. The user-callable colamd_set_defaults routine sets the default parameters. See that routine for a description of the user-controllable parameters. If the return value of Colamd is TRUE, then on output: p [0 ... n_col-1] the column permutation. p [0] is the first column index, and p [n_col-1] is the last. That is, p [k] = j means that column j of A is the kth column of AQ. A is undefined on output (the matrix pattern is destroyed), except for the following statistics: A [0] the number of dense (or empty) rows ignored A [1] the number of dense (or empty) columms. These are ordered last, in their natural order. A [2] the number of garbage collections performed. If this is excessive, then you would have gotten your results faster if Alen was larger. A [3] 0, if all row indices in each column were in sorted order and no duplicates were present. 1, if there were unsorted or duplicate row indices in the input. You would have gotten your results faster if A [3] was returned as 0. If the return value of Colamd is FALSE, then A and p are undefined on output. */ PUBLIC int colamd /* returns TRUE if successful */ ( /* === Parameters ======================================================= */ int n_row, /* number of rows in A */ int n_col, /* number of columns in A */ int Alen, /* length of A */ int A [], /* row indices of A */ int p [], /* pointers to columns in A */ double knobs [COLAMD_KNOBS] /* parameters (uses defaults if NULL) */ ) { /* === Local variables ================================================== */ int i ; /* loop index */ int nnz ; /* nonzeros in A */ int Row_size ; /* size of Row [], in integers */ int Col_size ; /* size of Col [], in integers */ int elbow_room ; /* remaining free space */ RowInfo *Row ; /* pointer into A of Row [0..n_row] array */ ColInfo *Col ; /* pointer into A of Col [0..n_col] array */ int n_col2 ; /* number of non-dense, non-empty columns */ int n_row2 ; /* number of non-dense, non-empty rows */ int ngarbage ; /* number of garbage collections performed */ int max_deg ; /* maximum row degree */ double default_knobs [COLAMD_KNOBS] ; /* default knobs knobs array */ int init_result ; /* return code from initialization */ #ifndef NDEBUG debug_colamd = 0 ; /* no debug printing */ /* get "D" environment variable, which gives the debug printing level */ if (getenv ("D")) debug_colamd = atoi (getenv ("D")) ; DEBUG0 (("debug version, D = %d (THIS WILL BE SLOOOOW!)\n", debug_colamd)) ; #endif /* === Check the input arguments ======================================== */ if (n_row < 0 || n_col < 0 || !A || !p) { /* n_row and n_col must be non-negative, A and p must be present */ DEBUG0 (("colamd error! %d %d %d\n", n_row, n_col, Alen)) ; return (FALSE) ; } nnz = p [n_col] ; if (nnz < 0 || p [0] != 0) { /* nnz must be non-negative, and p [0] must be zero */ DEBUG0 (("colamd error! %d %d\n", nnz, p [0])) ; return (FALSE) ; } /* === If no knobs, set default parameters ============================== */ if (!knobs) { knobs = default_knobs ; colamd_set_defaults (knobs) ; } /* === Allocate the Row and Col arrays from array A ===================== */ Col_size = (n_col + 1) * sizeof (ColInfo) / sizeof (int) ; Row_size = (n_row + 1) * sizeof (RowInfo) / sizeof (int) ; elbow_room = Alen - (2*nnz + Col_size + Row_size) ; if (elbow_room < n_col + COLAMD_STATS) { /* not enough space in array A to perform the ordering */ DEBUG0 (("colamd error! elbow_room %d, %d\n", elbow_room,n_col)) ; return (FALSE) ; } Alen = 2*nnz + elbow_room ; Col = (ColInfo *) &A [Alen] ; Row = (RowInfo *) &A [Alen + Col_size] ; /* === Construct the row and column data structures ===================== */ init_result = init_rows_cols (n_row, n_col, Row, Col, A, p) ; if (init_result == -1) { /* input matrix is invalid */ DEBUG0 (("colamd error! matrix invalid\n")) ; return (FALSE) ; } /* === Initialize scores, kill dense rows/columns ======================= */ init_scoring (n_row, n_col, Row, Col, A, p, knobs, &n_row2, &n_col2, &max_deg) ; /* === Order the supercolumns =========================================== */ ngarbage = find_ordering (n_row, n_col, Alen, Row, Col, A, p, n_col2, max_deg, 2*nnz) ; /* === Order the non-principal columns ================================== */ order_children (n_col, Col, p) ; /* === Return statistics in A =========================================== */ for (i = 0 ; i < COLAMD_STATS ; i++) { A [i] = 0 ; } A [COLAMD_DENSE_ROW] = n_row - n_row2 ; A [COLAMD_DENSE_COL] = n_col - n_col2 ; A [COLAMD_DEFRAG_COUNT] = ngarbage ; A [COLAMD_JUMBLED_COLS] = init_result ; return (TRUE) ; } /* ========================================================================== */ /* === NON-USER-CALLABLE ROUTINES: ========================================== */ /* ========================================================================== */ /* There are no user-callable routines beyond this point in the file */ /* ========================================================================== */ /* === init_rows_cols ======================================================= */ /* ========================================================================== */ /* Takes the column form of the matrix in A and creates the row form of the matrix. Also, row and column attributes are stored in the Col and Row structs. If the columns are un-sorted or contain duplicate row indices, this routine will also sort and remove duplicate row indices from the column form of the matrix. Returns -1 on error, 1 if columns jumbled, or 0 if columns not jumbled. Not user-callable. */ PRIVATE int init_rows_cols /* returns status code */ ( /* === Parameters ======================================================= */ int n_row, /* number of rows of A */ int n_col, /* number of columns of A */ RowInfo Row [], /* of size n_row+1 */ ColInfo Col [], /* of size n_col+1 */ int A [], /* row indices of A, of size Alen */ int p [] /* pointers to columns in A, of size n_col+1 */ ) { /* === Local variables ================================================== */ int col ; /* a column index */ int row ; /* a row index */ int *cp ; /* a column pointer */ int *cp_end ; /* a pointer to the end of a column */ int *rp ; /* a row pointer */ int *rp_end ; /* a pointer to the end of a row */ int last_start ; /* start index of previous column in A */ int start ; /* start index of column in A */ int last_row ; /* previous row */ int jumbled_columns ; /* indicates if columns are jumbled */ /* === Initialize columns, and check column pointers ==================== */ last_start = 0 ; for (col = 0 ; col < n_col ; col++) { start = p [col] ; if (start < last_start) { /* column pointers must be non-decreasing */ DEBUG0 (("colamd error! last p %d p [col] %d\n",last_start,start)); return (-1) ; } Col [col].start = start ; Col [col].length = p [col+1] - start ; Col [col].shared1.thickness = 1 ; Col [col].shared2.score = 0 ; Col [col].shared3.prev = EMPTY ; Col [col].shared4.degree_next = EMPTY ; last_start = start ; } /* must check the end pointer for last column */ if (p [n_col] < last_start) { /* column pointers must be non-decreasing */ DEBUG0 (("colamd error! last p %d p [n_col] %d\n",p[col],last_start)) ; return (-1) ; } /* p [0..n_col] no longer needed, used as "head" in subsequent routines */ /* === Scan columns, compute row degrees, and check row indices ========= */ jumbled_columns = FALSE ; for (row = 0 ; row < n_row ; row++) { Row [row].length = 0 ; Row [row].shared2.mark = -1 ; } for (col = 0 ; col < n_col ; col++) { last_row = -1 ; cp = &A [p [col]] ; cp_end = &A [p [col+1]] ; while (cp < cp_end) { row = *cp++ ; /* make sure row indices within range */ if (row < 0 || row >= n_row) { DEBUG0 (("colamd error! col %d row %d last_row %d\n", col, row, last_row)) ; return (-1) ; } else if (row <= last_row) { /* row indices are not sorted or repeated, thus cols */ /* are jumbled */ jumbled_columns = TRUE ; } /* prevent repeated row from being counted */ if (Row [row].shared2.mark != col) { Row [row].length++ ; Row [row].shared2.mark = col ; last_row = row ; } else { /* this is a repeated entry in the column, */ /* it will be removed */ Col [col].length-- ; } } } /* === Compute row pointers ============================================= */ /* row form of the matrix starts directly after the column */ /* form of matrix in A */ Row [0].start = p [n_col] ; Row [0].shared1.p = Row [0].start ; Row [0].shared2.mark = -1 ; for (row = 1 ; row < n_row ; row++) { Row [row].start = Row [row-1].start + Row [row-1].length ; Row [row].shared1.p = Row [row].start ; Row [row].shared2.mark = -1 ; } /* === Create row form ================================================== */ if (jumbled_columns) { /* if cols jumbled, watch for repeated row indices */ for (col = 0 ; col < n_col ; col++) { cp = &A [p [col]] ; cp_end = &A [p [col+1]] ; while (cp < cp_end) { row = *cp++ ; if (Row [row].shared2.mark != col) { A [(Row [row].shared1.p)++] = col ; Row [row].shared2.mark = col ; } } } } else { /* if cols not jumbled, we don't need the mark (this is faster) */ for (col = 0 ; col < n_col ; col++) { cp = &A [p [col]] ; cp_end = &A [p [col+1]] ; while (cp < cp_end) { A [(Row [*cp++].shared1.p)++] = col ; } } } /* === Clear the row marks and set row degrees ========================== */ for (row = 0 ; row < n_row ; row++) { Row [row].shared2.mark = 0 ; Row [row].shared1.degree = Row [row].length ; } /* === See if we need to re-create columns ============================== */ if (jumbled_columns) { #ifndef NDEBUG /* make sure column lengths are correct */ for (col = 0 ; col < n_col ; col++) { p [col] = Col [col].length ; } for (row = 0 ; row < n_row ; row++) { rp = &A [Row [row].start] ; rp_end = rp + Row [row].length ; while (rp < rp_end) { p [*rp++]-- ; } } for (col = 0 ; col < n_col ; col++) { assert (p [col] == 0) ; } /* now p is all zero (different than when debugging is turned off) */ #endif /* === Compute col pointers ========================================= */ /* col form of the matrix starts at A [0]. */ /* Note, we may have a gap between the col form and the row */ /* form if there were duplicate entries, if so, it will be */ /* removed upon the first garbage collection */ Col [0].start = 0 ; p [0] = Col [0].start ; for (col = 1 ; col < n_col ; col++) { /* note that the lengths here are for pruned columns, i.e. */ /* no duplicate row indices will exist for these columns */ Col [col].start = Col [col-1].start + Col [col-1].length ; p [col] = Col [col].start ; } /* === Re-create col form =========================================== */ for (row = 0 ; row < n_row ; row++) { rp = &A [Row [row].start] ; rp_end = rp + Row [row].length ; while (rp < rp_end) { A [(p [*rp++])++] = row ; } } return (1) ; } else { /* no columns jumbled (this is faster) */ return (0) ; } } /* ========================================================================== */ /* === init_scoring ========================================================= */ /* ========================================================================== */ /* Kills dense or empty columns and rows, calculates an initial score for each column, and places all columns in the degree lists. Not user-callable. */ PRIVATE void init_scoring ( /* === Parameters ======================================================= */ int n_row, /* number of rows of A */ int n_col, /* number of columns of A */ RowInfo Row [], /* of size n_row+1 */ ColInfo Col [], /* of size n_col+1 */ int A [], /* column form and row form of A */ int head [], /* of size n_col+1 */ double knobs [COLAMD_KNOBS],/* parameters */ int *p_n_row2, /* number of non-dense, non-empty rows */ int *p_n_col2, /* number of non-dense, non-empty columns */ int *p_max_deg /* maximum row degree */ ) { /* === Local variables ================================================== */ int c ; /* a column index */ int r, row ; /* a row index */ int *cp ; /* a column pointer */ int deg ; /* degree (# entries) of a row or column */ int *cp_end ; /* a pointer to the end of a column */ int *new_cp ; /* new column pointer */ int col_length ; /* length of pruned column */ int score ; /* current column score */ int n_col2 ; /* number of non-dense, non-empty columns */ int n_row2 ; /* number of non-dense, non-empty rows */ int dense_row_count ; /* remove rows with more entries than this */ int dense_col_count ; /* remove cols with more entries than this */ int min_score ; /* smallest column score */ int max_deg ; /* maximum row degree */ int next_col ; /* Used to add to degree list.*/ #ifndef NDEBUG int debug_count ; /* debug only. */ #endif /* === Extract knobs ==================================================== */ dense_row_count = MAX (0, MIN (knobs [COLAMD_DENSE_ROW] * n_col, n_col)) ; dense_col_count = MAX (0, MIN (knobs [COLAMD_DENSE_COL] * n_row, n_row)) ; DEBUG0 (("densecount: %d %d\n", dense_row_count, dense_col_count)) ; max_deg = 0 ; n_col2 = n_col ; n_row2 = n_row ; /* === Kill empty columns =============================================== */ /* Put the empty columns at the end in their natural, so that LU */ /* factorization can proceed as far as possible. */ for (c = n_col-1 ; c >= 0 ; c--) { deg = Col [c].length ; if (deg == 0) { /* this is a empty column, kill and order it last */ Col [c].shared2.order = --n_col2 ; KILL_PRINCIPAL_COL (c) ; } } DEBUG0 (("null columns killed: %d\n", n_col - n_col2)) ; /* === Kill dense columns =============================================== */ /* Put the dense columns at the end, in their natural order */ for (c = n_col-1 ; c >= 0 ; c--) { /* skip any dead columns */ if (COL_IS_DEAD (c)) { continue ; } deg = Col [c].length ; if (deg > dense_col_count) { /* this is a dense column, kill and order it last */ Col [c].shared2.order = --n_col2 ; /* decrement the row degrees */ cp = &A [Col [c].start] ; cp_end = cp + Col [c].length ; while (cp < cp_end) { Row [*cp++].shared1.degree-- ; } KILL_PRINCIPAL_COL (c) ; } } DEBUG0 (("Dense and null columns killed: %d\n", n_col - n_col2)) ; /* === Kill dense and empty rows ======================================== */ for (r = 0 ; r < n_row ; r++) { deg = Row [r].shared1.degree ; assert (deg >= 0 && deg <= n_col) ; if (deg > dense_row_count || deg == 0) { /* kill a dense or empty row */ KILL_ROW (r) ; --n_row2 ; } else { /* keep track of max degree of remaining rows */ max_deg = MAX (max_deg, deg) ; } } DEBUG0 (("Dense and null rows killed: %d\n", n_row - n_row2)) ; /* === Compute initial column scores ==================================== */ /* At this point the row degrees are accurate. They reflect the number */ /* of "live" (non-dense) columns in each row. No empty rows exist. */ /* Some "live" columns may contain only dead rows, however. These are */ /* pruned in the code below. */ /* now find the initial matlab score for each column */ for (c = n_col-1 ; c >= 0 ; c--) { /* skip dead column */ if (COL_IS_DEAD (c)) { continue ; } score = 0 ; cp = &A [Col [c].start] ; new_cp = cp ; cp_end = cp + Col [c].length ; while (cp < cp_end) { /* get a row */ row = *cp++ ; /* skip if dead */ if (ROW_IS_DEAD (row)) { continue ; } /* compact the column */ *new_cp++ = row ; /* add row's external degree */ score += Row [row].shared1.degree - 1 ; /* guard against integer overflow */ score = MIN (score, n_col) ; } /* determine pruned column length */ col_length = (int) (new_cp - &A [Col [c].start]) ; if (col_length == 0) { /* a newly-made null column (all rows in this col are "dense" */ /* and have already been killed) */ DEBUG0 (("Newly null killed: %d\n", c)) ; Col [c].shared2.order = --n_col2 ; KILL_PRINCIPAL_COL (c) ; } else { /* set column length and set score */ assert (score >= 0) ; assert (score <= n_col) ; Col [c].length = col_length ; Col [c].shared2.score = score ; } } DEBUG0 (("Dense, null, and newly-null columns killed: %d\n",n_col-n_col2)) ; /* At this point, all empty rows and columns are dead. All live columns */ /* are "clean" (containing no dead rows) and simplicial (no supercolumns */ /* yet). Rows may contain dead columns, but all live rows contain at */ /* least one live column. */ #ifndef NDEBUG debug_structures (n_row, n_col, Row, Col, A, n_col2) ; #endif /* === Initialize degree lists ========================================== */ #ifndef NDEBUG debug_count = 0 ; #endif /* clear the hash buckets */ for (c = 0 ; c <= n_col ; c++) { head [c] = EMPTY ; } min_score = n_col ; /* place in reverse order, so low column indices are at the front */ /* of the lists. This is to encourage natural tie-breaking */ for (c = n_col-1 ; c >= 0 ; c--) { /* only add principal columns to degree lists */ if (COL_IS_ALIVE (c)) { DEBUG4 (("place %d score %d minscore %d ncol %d\n", c, Col [c].shared2.score, min_score, n_col)) ; /* === Add columns score to DList =============================== */ score = Col [c].shared2.score ; assert (min_score >= 0) ; assert (min_score <= n_col) ; assert (score >= 0) ; assert (score <= n_col) ; assert (head [score] >= EMPTY) ; /* now add this column to dList at proper score location */ next_col = head [score] ; Col [c].shared3.prev = EMPTY ; Col [c].shared4.degree_next = next_col ; /* if there already was a column with the same score, set its */ /* previous pointer to this new column */ if (next_col != EMPTY) { Col [next_col].shared3.prev = c ; } head [score] = c ; /* see if this score is less than current min */ min_score = MIN (min_score, score) ; #ifndef NDEBUG debug_count++ ; #endif } } #ifndef NDEBUG DEBUG0 (("Live cols %d out of %d, non-princ: %d\n", debug_count, n_col, n_col-debug_count)) ; assert (debug_count == n_col2) ; debug_deg_lists (n_row, n_col, Row, Col, head, min_score, n_col2, max_deg) ; #endif /* === Return number of remaining columns, and max row degree =========== */ *p_n_col2 = n_col2 ; *p_n_row2 = n_row2 ; *p_max_deg = max_deg ; } /* ========================================================================== */ /* === find_ordering ======================================================== */ /* ========================================================================== */ /* Order the principal columns of the supercolumn form of the matrix (no supercolumns on input). Uses a minimum approximate column minimum degree ordering method. Not user-callable. */ PRIVATE int find_ordering /* return the number of garbage collections */ ( /* === Parameters ======================================================= */ int n_row, /* number of rows of A */ int n_col, /* number of columns of A */ int Alen, /* size of A, 2*nnz + elbow_room or larger */ RowInfo Row [], /* of size n_row+1 */ ColInfo Col [], /* of size n_col+1 */ int A [], /* column form and row form of A */ int head [], /* of size n_col+1 */ int n_col2, /* Remaining columns to order */ int max_deg, /* Maximum row degree */ int pfree /* index of first free slot (2*nnz on entry) */ ) { /* === Local variables ================================================== */ int k ; /* current pivot ordering step */ int pivot_col ; /* current pivot column */ int *cp ; /* a column pointer */ int *rp ; /* a row pointer */ int pivot_row ; /* current pivot row */ int *new_cp ; /* modified column pointer */ int *new_rp ; /* modified row pointer */ int pivot_row_start ; /* pointer to start of pivot row */ int pivot_row_degree ; /* # of columns in pivot row */ int pivot_row_length ; /* # of supercolumns in pivot row */ int pivot_col_score ; /* score of pivot column */ int needed_memory ; /* free space needed for pivot row */ int *cp_end ; /* pointer to the end of a column */ int *rp_end ; /* pointer to the end of a row */ int row ; /* a row index */ int col ; /* a column index */ int max_score ; /* maximum possible score */ int cur_score ; /* score of current column */ unsigned int hash ; /* hash value for supernode detection */ int head_column ; /* head of hash bucket */ int first_col ; /* first column in hash bucket */ int tag_mark ; /* marker value for mark array */ int row_mark ; /* Row [row].shared2.mark */ int set_difference ; /* set difference size of row with pivot row */ int min_score ; /* smallest column score */ int col_thickness ; /* "thickness" (# of columns in a supercol) */ int max_mark ; /* maximum value of tag_mark */ int pivot_col_thickness ; /* number of columns represented by pivot col */ int prev_col ; /* Used by Dlist operations. */ int next_col ; /* Used by Dlist operations. */ int ngarbage ; /* number of garbage collections performed */ #ifndef NDEBUG int debug_d ; /* debug loop counter */ int debug_step = 0 ; /* debug loop counter */ #endif /* === Initialization and clear mark ==================================== */ max_mark = INT_MAX - n_col ; /* INT_MAX defined in */ tag_mark = clear_mark (n_row, Row) ; min_score = 0 ; ngarbage = 0 ; DEBUG0 (("Ordering.. n_col2=%d\n", n_col2)) ; /* === Order the columns ================================================ */ for (k = 0 ; k < n_col2 ; /* 'k' is incremented below */) { #ifndef NDEBUG if (debug_step % 100 == 0) { DEBUG0 (("\n... Step k: %d out of n_col2: %d\n", k, n_col2)) ; } else { DEBUG1 (("\n----------Step k: %d out of n_col2: %d\n", k, n_col2)) ; } debug_step++ ; debug_deg_lists (n_row, n_col, Row, Col, head, min_score, n_col2-k, max_deg) ; debug_matrix (n_row, n_col, Row, Col, A) ; #endif /* === Select pivot column, and order it ============================ */ /* make sure degree list isn't empty */ assert (min_score >= 0) ; assert (min_score <= n_col) ; assert (head [min_score] >= EMPTY) ; #ifndef NDEBUG for (debug_d = 0 ; debug_d < min_score ; debug_d++) { assert (head [debug_d] == EMPTY) ; } #endif /* get pivot column from head of minimum degree list */ while (head [min_score] == EMPTY && min_score < n_col) { min_score++ ; } pivot_col = head [min_score] ; assert (pivot_col >= 0 && pivot_col <= n_col) ; next_col = Col [pivot_col].shared4.degree_next ; head [min_score] = next_col ; if (next_col != EMPTY) { Col [next_col].shared3.prev = EMPTY ; } assert (COL_IS_ALIVE (pivot_col)) ; DEBUG3 (("Pivot col: %d\n", pivot_col)) ; /* remember score for defrag check */ pivot_col_score = Col [pivot_col].shared2.score ; /* the pivot column is the kth column in the pivot order */ Col [pivot_col].shared2.order = k ; /* increment order count by column thickness */ pivot_col_thickness = Col [pivot_col].shared1.thickness ; k += pivot_col_thickness ; assert (pivot_col_thickness > 0) ; /* === Garbage_collection, if necessary ============================= */ needed_memory = MIN (pivot_col_score, n_col - k) ; if (pfree + needed_memory >= Alen) { pfree = garbage_collection (n_row, n_col, Row, Col, A, &A [pfree]) ; ngarbage++ ; /* after garbage collection we will have enough */ assert (pfree + needed_memory < Alen) ; /* garbage collection has wiped out the Row[].shared2.mark array */ tag_mark = clear_mark (n_row, Row) ; #ifndef NDEBUG debug_matrix (n_row, n_col, Row, Col, A) ; #endif } /* === Compute pivot row pattern ==================================== */ /* get starting location for this new merged row */ pivot_row_start = pfree ; /* initialize new row counts to zero */ pivot_row_degree = 0 ; /* tag pivot column as having been visited so it isn't included */ /* in merged pivot row */ Col [pivot_col].shared1.thickness = -pivot_col_thickness ; /* pivot row is the union of all rows in the pivot column pattern */ cp = &A [Col [pivot_col].start] ; cp_end = cp + Col [pivot_col].length ; while (cp < cp_end) { /* get a row */ row = *cp++ ; DEBUG4 (("Pivot col pattern %d %d\n", ROW_IS_ALIVE (row), row)) ; /* skip if row is dead */ if (ROW_IS_DEAD (row)) { continue ; } rp = &A [Row [row].start] ; rp_end = rp + Row [row].length ; while (rp < rp_end) { /* get a column */ col = *rp++ ; /* add the column, if alive and untagged */ col_thickness = Col [col].shared1.thickness ; if (col_thickness > 0 && COL_IS_ALIVE (col)) { /* tag column in pivot row */ Col [col].shared1.thickness = -col_thickness ; assert (pfree < Alen) ; /* place column in pivot row */ A [pfree++] = col ; pivot_row_degree += col_thickness ; } } } /* clear tag on pivot column */ Col [pivot_col].shared1.thickness = pivot_col_thickness ; max_deg = MAX (max_deg, pivot_row_degree) ; #ifndef NDEBUG DEBUG3 (("check2\n")) ; debug_mark (n_row, Row, tag_mark, max_mark) ; #endif /* === Kill all rows used to construct pivot row ==================== */ /* also kill pivot row, temporarily */ cp = &A [Col [pivot_col].start] ; cp_end = cp + Col [pivot_col].length ; while (cp < cp_end) { /* may be killing an already dead row */ row = *cp++ ; DEBUG2 (("Kill row in pivot col: %d\n", row)) ; KILL_ROW (row) ; } /* === Select a row index to use as the new pivot row =============== */ pivot_row_length = pfree - pivot_row_start ; if (pivot_row_length > 0) { /* pick the "pivot" row arbitrarily (first row in col) */ pivot_row = A [Col [pivot_col].start] ; DEBUG2 (("Pivotal row is %d\n", pivot_row)) ; } else { /* there is no pivot row, since it is of zero length */ pivot_row = EMPTY ; assert (pivot_row_length == 0) ; } assert (Col [pivot_col].length > 0 || pivot_row_length == 0) ; /* === Approximate degree computation =============================== */ /* Here begins the computation of the approximate degree. The column */ /* score is the sum of the pivot row "length", plus the size of the */ /* set differences of each row in the column minus the pattern of the */ /* pivot row itself. The column ("thickness") itself is also */ /* excluded from the column score (we thus use an approximate */ /* external degree). */ /* The time taken by the following code (compute set differences, and */ /* add them up) is proportional to the size of the data structure */ /* being scanned - that is, the sum of the sizes of each column in */ /* the pivot row. Thus, the amortized time to compute a column score */ /* is proportional to the size of that column (where size, in this */ /* context, is the column "length", or the number of row indices */ /* in that column). The number of row indices in a column is */ /* monotonically non-decreasing, from the length of the original */ /* column on input to colamd. */ /* === Compute set differences ====================================== */ DEBUG1 (("** Computing set differences phase. **\n")) ; /* pivot row is currently dead - it will be revived later. */ DEBUG2 (("Pivot row: ")) ; /* for each column in pivot row */ rp = &A [pivot_row_start] ; rp_end = rp + pivot_row_length ; while (rp < rp_end) { col = *rp++ ; assert (COL_IS_ALIVE (col) && col != pivot_col) ; DEBUG2 (("Col: %d\n", col)) ; /* clear tags used to construct pivot row pattern */ col_thickness = -Col [col].shared1.thickness ; assert (col_thickness > 0) ; Col [col].shared1.thickness = col_thickness ; /* === Remove column from degree list =========================== */ cur_score = Col [col].shared2.score ; prev_col = Col [col].shared3.prev ; next_col = Col [col].shared4.degree_next ; assert (cur_score >= 0) ; assert (cur_score <= n_col) ; assert (cur_score >= EMPTY) ; if (prev_col == EMPTY) { head [cur_score] = next_col ; } else { Col [prev_col].shared4.degree_next = next_col ; } if (next_col != EMPTY) { Col [next_col].shared3.prev = prev_col ; } /* === Scan the column ========================================== */ cp = &A [Col [col].start] ; cp_end = cp + Col [col].length ; while (cp < cp_end) { /* get a row */ row = *cp++ ; row_mark = Row [row].shared2.mark ; /* skip if dead */ if (ROW_IS_MARKED_DEAD (row_mark)) { continue ; } assert (row != pivot_row) ; set_difference = row_mark - tag_mark ; /* check if the row has been seen yet */ if (set_difference < 0) { assert (Row [row].shared1.degree <= max_deg) ; set_difference = Row [row].shared1.degree ; } /* subtract column thickness from this row's set difference */ set_difference -= col_thickness ; assert (set_difference >= 0) ; /* absorb this row if the set difference becomes zero */ if (set_difference == 0) { DEBUG1 (("aggressive absorption. Row: %d\n", row)) ; KILL_ROW (row) ; } else { /* save the new mark */ Row [row].shared2.mark = set_difference + tag_mark ; } } } #ifndef NDEBUG debug_deg_lists (n_row, n_col, Row, Col, head, min_score, n_col2-k-pivot_row_degree, max_deg) ; #endif /* === Add up set differences for each column ======================= */ DEBUG1 (("** Adding set differences phase. **\n")) ; /* for each column in pivot row */ rp = &A [pivot_row_start] ; rp_end = rp + pivot_row_length ; while (rp < rp_end) { /* get a column */ col = *rp++ ; assert (COL_IS_ALIVE (col) && col != pivot_col) ; hash = 0 ; cur_score = 0 ; cp = &A [Col [col].start] ; /* compact the column */ new_cp = cp ; cp_end = cp + Col [col].length ; DEBUG2 (("Adding set diffs for Col: %d.\n", col)) ; while (cp < cp_end) { /* get a row */ row = *cp++ ; assert(row >= 0 && row < n_row) ; row_mark = Row [row].shared2.mark ; /* skip if dead */ if (ROW_IS_MARKED_DEAD (row_mark)) { continue ; } assert (row_mark > tag_mark) ; /* compact the column */ *new_cp++ = row ; /* compute hash function */ hash += row ; /* add set difference */ cur_score += row_mark - tag_mark ; /* integer overflow... */ cur_score = MIN (cur_score, n_col) ; } /* recompute the column's length */ Col [col].length = (int) (new_cp - &A [Col [col].start]) ; /* === Further mass elimination ================================= */ if (Col [col].length == 0) { DEBUG1 (("further mass elimination. Col: %d\n", col)) ; /* nothing left but the pivot row in this column */ KILL_PRINCIPAL_COL (col) ; pivot_row_degree -= Col [col].shared1.thickness ; assert (pivot_row_degree >= 0) ; /* order it */ Col [col].shared2.order = k ; /* increment order count by column thickness */ k += Col [col].shared1.thickness ; } else { /* === Prepare for supercolumn detection ==================== */ DEBUG2 (("Preparing supercol detection for Col: %d.\n", col)) ; /* save score so far */ Col [col].shared2.score = cur_score ; /* add column to hash table, for supercolumn detection */ hash %= n_col + 1 ; DEBUG2 ((" Hash = %d, n_col = %d.\n", hash, n_col)) ; assert (hash <= n_col) ; head_column = head [hash] ; if (head_column > EMPTY) { /* degree list "hash" is non-empty, use prev (shared3) of */ /* first column in degree list as head of hash bucket */ first_col = Col [head_column].shared3.headhash ; Col [head_column].shared3.headhash = col ; } else { /* degree list "hash" is empty, use head as hash bucket */ first_col = - (head_column + 2) ; head [hash] = - (col + 2) ; } Col [col].shared4.hash_next = first_col ; /* save hash function in Col [col].shared3.hash */ Col [col].shared3.hash = (int) hash ; assert (COL_IS_ALIVE (col)) ; } } /* The approximate external column degree is now computed. */ /* === Supercolumn detection ======================================== */ DEBUG1 (("** Supercolumn detection phase. **\n")) ; detect_super_cols ( #ifndef NDEBUG n_col, Row, #endif Col, A, head, pivot_row_start, pivot_row_length) ; /* === Kill the pivotal column ====================================== */ KILL_PRINCIPAL_COL (pivot_col) ; /* === Clear mark =================================================== */ tag_mark += (max_deg + 1) ; if (tag_mark >= max_mark) { DEBUG1 (("clearing tag_mark\n")) ; tag_mark = clear_mark (n_row, Row) ; } #ifndef NDEBUG DEBUG3 (("check3\n")) ; debug_mark (n_row, Row, tag_mark, max_mark) ; #endif /* === Finalize the new pivot row, and column scores ================ */ DEBUG1 (("** Finalize scores phase. **\n")) ; /* for each column in pivot row */ rp = &A [pivot_row_start] ; /* compact the pivot row */ new_rp = rp ; rp_end = rp + pivot_row_length ; while (rp < rp_end) { col = *rp++ ; /* skip dead columns */ if (COL_IS_DEAD (col)) { continue ; } *new_rp++ = col ; /* add new pivot row to column */ A [Col [col].start + (Col [col].length++)] = pivot_row ; /* retrieve score so far and add on pivot row's degree. */ /* (we wait until here for this in case the pivot */ /* row's degree was reduced due to mass elimination). */ cur_score = Col [col].shared2.score + pivot_row_degree ; /* calculate the max possible score as the number of */ /* external columns minus the 'k' value minus the */ /* columns thickness */ max_score = n_col - k - Col [col].shared1.thickness ; /* make the score the external degree of the union-of-rows */ cur_score -= Col [col].shared1.thickness ; /* make sure score is less or equal than the max score */ cur_score = MIN (cur_score, max_score) ; assert (cur_score >= 0) ; /* store updated score */ Col [col].shared2.score = cur_score ; /* === Place column back in degree list ========================= */ assert (min_score >= 0) ; assert (min_score <= n_col) ; assert (cur_score >= 0) ; assert (cur_score <= n_col) ; assert (head [cur_score] >= EMPTY) ; next_col = head [cur_score] ; Col [col].shared4.degree_next = next_col ; Col [col].shared3.prev = EMPTY ; if (next_col != EMPTY) { Col [next_col].shared3.prev = col ; } head [cur_score] = col ; /* see if this score is less than current min */ min_score = MIN (min_score, cur_score) ; } #ifndef NDEBUG debug_deg_lists (n_row, n_col, Row, Col, head, min_score, n_col2-k, max_deg) ; #endif /* === Resurrect the new pivot row ================================== */ if (pivot_row_degree > 0) { /* update pivot row length to reflect any cols that were killed */ /* during super-col detection and mass elimination */ Row [pivot_row].start = pivot_row_start ; Row [pivot_row].length = (int) (new_rp - &A[pivot_row_start]) ; Row [pivot_row].shared1.degree = pivot_row_degree ; Row [pivot_row].shared2.mark = 0 ; /* pivot row is no longer dead */ } } /* === All principal columns have now been ordered ====================== */ return (ngarbage) ; } /* ========================================================================== */ /* === order_children ======================================================= */ /* ========================================================================== */ /* The find_ordering routine has ordered all of the principal columns (the representatives of the supercolumns). The non-principal columns have not yet been ordered. This routine orders those columns by walking up the parent tree (a column is a child of the column which absorbed it). The final permutation vector is then placed in p [0 ... n_col-1], with p [0] being the first column, and p [n_col-1] being the last. It doesn't look like it at first glance, but be assured that this routine takes time linear in the number of columns. Although not immediately obvious, the time taken by this routine is O (n_col), that is, linear in the number of columns. Not user-callable. */ PRIVATE void order_children ( /* === Parameters ======================================================= */ int n_col, /* number of columns of A */ ColInfo Col [], /* of size n_col+1 */ int p [] /* p [0 ... n_col-1] is the column permutation*/ ) { /* === Local variables ================================================== */ int i ; /* loop counter for all columns */ int c ; /* column index */ int parent ; /* index of column's parent */ int order ; /* column's order */ /* === Order each non-principal column ================================== */ for (i = 0 ; i < n_col ; i++) { /* find an un-ordered non-principal column */ assert (COL_IS_DEAD (i)) ; if (!COL_IS_DEAD_PRINCIPAL (i) && Col [i].shared2.order == EMPTY) { parent = i ; /* once found, find its principal parent */ do { parent = Col [parent].shared1.parent ; } while (!COL_IS_DEAD_PRINCIPAL (parent)) ; /* now, order all un-ordered non-principal columns along path */ /* to this parent. collapse tree at the same time */ c = i ; /* get order of parent */ order = Col [parent].shared2.order ; do { assert (Col [c].shared2.order == EMPTY) ; /* order this column */ Col [c].shared2.order = order++ ; /* collaps tree */ Col [c].shared1.parent = parent ; /* get immediate parent of this column */ c = Col [c].shared1.parent ; /* continue until we hit an ordered column. There are */ /* guarranteed not to be anymore unordered columns */ /* above an ordered column */ } while (Col [c].shared2.order == EMPTY) ; /* re-order the super_col parent to largest order for this group */ Col [parent].shared2.order = order ; } } /* === Generate the permutation ========================================= */ for (c = 0 ; c < n_col ; c++) { p [Col [c].shared2.order] = c ; } } /* ========================================================================== */ /* === detect_super_cols ==================================================== */ /* ========================================================================== */ /* Detects supercolumns by finding matches between columns in the hash buckets. Check amongst columns in the set A [row_start ... row_start + row_length-1]. The columns under consideration are currently *not* in the degree lists, and have already been placed in the hash buckets. The hash bucket for columns whose hash function is equal to h is stored as follows: if head [h] is >= 0, then head [h] contains a degree list, so: head [h] is the first column in degree bucket h. Col [head [h]].headhash gives the first column in hash bucket h. otherwise, the degree list is empty, and: -(head [h] + 2) is the first column in hash bucket h. For a column c in a hash bucket, Col [c].shared3.prev is NOT a "previous column" pointer. Col [c].shared3.hash is used instead as the hash number for that column. The value of Col [c].shared4.hash_next is the next column in the same hash bucket. Assuming no, or "few" hash collisions, the time taken by this routine is linear in the sum of the sizes (lengths) of each column whose score has just been computed in the approximate degree computation. Not user-callable. */ PRIVATE void detect_super_cols ( /* === Parameters ======================================================= */ #ifndef NDEBUG /* these two parameters are only needed when debugging is enabled: */ int n_col, /* number of columns of A */ RowInfo Row [], /* of size n_row+1 */ #endif ColInfo Col [], /* of size n_col+1 */ int A [], /* row indices of A */ int head [], /* head of degree lists and hash buckets */ int row_start, /* pointer to set of columns to check */ int row_length /* number of columns to check */ ) { /* === Local variables ================================================== */ int hash ; /* hash # for a column */ int *rp ; /* pointer to a row */ int c ; /* a column index */ int super_c ; /* column index of the column to absorb into */ int *cp1 ; /* column pointer for column super_c */ int *cp2 ; /* column pointer for column c */ int length ; /* length of column super_c */ int prev_c ; /* column preceding c in hash bucket */ int i ; /* loop counter */ int *rp_end ; /* pointer to the end of the row */ int col ; /* a column index in the row to check */ int head_column ; /* first column in hash bucket or degree list */ int first_col ; /* first column in hash bucket */ /* === Consider each column in the row ================================== */ rp = &A [row_start] ; rp_end = rp + row_length ; while (rp < rp_end) { col = *rp++ ; if (COL_IS_DEAD (col)) { continue ; } /* get hash number for this column */ hash = Col [col].shared3.hash ; assert (hash <= n_col) ; /* === Get the first column in this hash bucket ===================== */ head_column = head [hash] ; if (head_column > EMPTY) { first_col = Col [head_column].shared3.headhash ; } else { first_col = - (head_column + 2) ; } /* === Consider each column in the hash bucket ====================== */ for (super_c = first_col ; super_c != EMPTY ; super_c = Col [super_c].shared4.hash_next) { assert (COL_IS_ALIVE (super_c)) ; assert (Col [super_c].shared3.hash == hash) ; length = Col [super_c].length ; /* prev_c is the column preceding column c in the hash bucket */ prev_c = super_c ; /* === Compare super_c with all columns after it ================ */ for (c = Col [super_c].shared4.hash_next ; c != EMPTY ; c = Col [c].shared4.hash_next) { assert (c != super_c) ; assert (COL_IS_ALIVE (c)) ; assert (Col [c].shared3.hash == hash) ; /* not identical if lengths or scores are different */ if (Col [c].length != length || Col [c].shared2.score != Col [super_c].shared2.score) { prev_c = c ; continue ; } /* compare the two columns */ cp1 = &A [Col [super_c].start] ; cp2 = &A [Col [c].start] ; for (i = 0 ; i < length ; i++) { /* the columns are "clean" (no dead rows) */ assert (ROW_IS_ALIVE (*cp1)) ; assert (ROW_IS_ALIVE (*cp2)) ; /* row indices will same order for both supercols, */ /* no gather scatter nessasary */ if (*cp1++ != *cp2++) { break ; } } /* the two columns are different if the for-loop "broke" */ if (i != length) { prev_c = c ; continue ; } /* === Got it! two columns are identical =================== */ assert (Col [c].shared2.score == Col [super_c].shared2.score) ; Col [super_c].shared1.thickness += Col [c].shared1.thickness ; Col [c].shared1.parent = super_c ; KILL_NON_PRINCIPAL_COL (c) ; /* order c later, in order_children() */ Col [c].shared2.order = EMPTY ; /* remove c from hash bucket */ Col [prev_c].shared4.hash_next = Col [c].shared4.hash_next ; } } /* === Empty this hash bucket ======================================= */ if (head_column > EMPTY) { /* corresponding degree list "hash" is not empty */ Col [head_column].shared3.headhash = EMPTY ; } else { /* corresponding degree list "hash" is empty */ head [hash] = EMPTY ; } } } /* ========================================================================== */ /* === garbage_collection =================================================== */ /* ========================================================================== */ /* Defragments and compacts columns and rows in the workspace A. Used when all available memory has been used while performing row merging. Returns the index of the first free position in A, after garbage collection. The time taken by this routine is linear is the size of the array A, which is itself linear in the number of nonzeros in the input matrix. Not user-callable. */ PRIVATE int garbage_collection /* returns the new value of pfree */ ( /* === Parameters ======================================================= */ int n_row, /* number of rows */ int n_col, /* number of columns */ RowInfo Row [], /* row info */ ColInfo Col [], /* column info */ int A [], /* A [0 ... Alen-1] holds the matrix */ int *pfree /* &A [0] ... pfree is in use */ ) { /* === Local variables ================================================== */ int *psrc ; /* source pointer */ int *pdest ; /* destination pointer */ int j ; /* counter */ int r ; /* a row index */ int c ; /* a column index */ int length ; /* length of a row or column */ #ifndef NDEBUG int debug_rows ; DEBUG0 (("Defrag..\n")) ; for (psrc = &A[0] ; psrc < pfree ; psrc++) assert (*psrc >= 0) ; debug_rows = 0 ; #endif /* === Defragment the columns =========================================== */ pdest = &A[0] ; for (c = 0 ; c < n_col ; c++) { if (COL_IS_ALIVE (c)) { psrc = &A [Col [c].start] ; /* move and compact the column */ assert (pdest <= psrc) ; Col [c].start = (int) (pdest - &A [0]) ; length = Col [c].length ; for (j = 0 ; j < length ; j++) { r = *psrc++ ; if (ROW_IS_ALIVE (r)) { *pdest++ = r ; } } Col [c].length = (int) (pdest - &A [Col [c].start]) ; } } /* === Prepare to defragment the rows =================================== */ for (r = 0 ; r < n_row ; r++) { if (ROW_IS_ALIVE (r)) { if (Row [r].length == 0) { /* this row is of zero length. cannot compact it, so kill it */ DEBUG0 (("Defrag row kill\n")) ; KILL_ROW (r) ; } else { /* save first column index in Row [r].shared2.first_column */ psrc = &A [Row [r].start] ; Row [r].shared2.first_column = *psrc ; assert (ROW_IS_ALIVE (r)) ; /* flag the start of the row with the one's complement of row */ *psrc = ONES_COMPLEMENT (r) ; #ifndef NDEBUG debug_rows++ ; #endif } } } /* === Defragment the rows ============================================== */ psrc = pdest ; while (psrc < pfree) { /* find a negative number ... the start of a row */ if (*psrc++ < 0) { psrc-- ; /* get the row index */ r = ONES_COMPLEMENT (*psrc) ; assert (r >= 0 && r < n_row) ; /* restore first column index */ *psrc = Row [r].shared2.first_column ; assert (ROW_IS_ALIVE (r)) ; /* move and compact the row */ assert (pdest <= psrc) ; Row [r].start = (int) (pdest - &A [0]) ; length = Row [r].length ; for (j = 0 ; j < length ; j++) { c = *psrc++ ; if (COL_IS_ALIVE (c)) { *pdest++ = c ; } } Row [r].length = (int) (pdest - &A [Row [r].start]) ; #ifndef NDEBUG debug_rows-- ; #endif } } /* ensure we found all the rows */ assert (debug_rows == 0) ; /* === Return the new value of pfree ==================================== */ return ((int) (pdest - &A [0])) ; } /* ========================================================================== */ /* === clear_mark =========================================================== */ /* ========================================================================== */ /* Clears the Row [].shared2.mark array, and returns the new tag_mark. Return value is the new tag_mark. Not user-callable. */ PRIVATE int clear_mark /* return the new value for tag_mark */ ( /* === Parameters ======================================================= */ int n_row, /* number of rows in A */ RowInfo Row [] /* Row [0 ... n_row-1].shared2.mark is set to zero */ ) { /* === Local variables ================================================== */ int r ; DEBUG0 (("Clear mark\n")) ; for (r = 0 ; r < n_row ; r++) { if (ROW_IS_ALIVE (r)) { Row [r].shared2.mark = 0 ; } } return (1) ; } /* ========================================================================== */ /* === debugging routines =================================================== */ /* ========================================================================== */ /* When debugging is disabled, the remainder of this file is ignored. */ #ifndef NDEBUG /* ========================================================================== */ /* === debug_structures ===================================================== */ /* ========================================================================== */ /* At this point, all empty rows and columns are dead. All live columns are "clean" (containing no dead rows) and simplicial (no supercolumns yet). Rows may contain dead columns, but all live rows contain at least one live column. */ PRIVATE void debug_structures ( /* === Parameters ======================================================= */ int n_row, int n_col, RowInfo Row [], ColInfo Col [], int A [], int n_col2 ) { /* === Local variables ================================================== */ int i ; int c ; int *cp ; int *cp_end ; int len ; int score ; int r ; int *rp ; int *rp_end ; int deg ; /* === Check A, Row, and Col ============================================ */ for (c = 0 ; c < n_col ; c++) { if (COL_IS_ALIVE (c)) { len = Col [c].length ; score = Col [c].shared2.score ; DEBUG4 (("initial live col %5d %5d %5d\n", c, len, score)) ; assert (len > 0) ; assert (score >= 0) ; assert (Col [c].shared1.thickness == 1) ; cp = &A [Col [c].start] ; cp_end = cp + len ; while (cp < cp_end) { r = *cp++ ; assert (ROW_IS_ALIVE (r)) ; } } else { i = Col [c].shared2.order ; assert (i >= n_col2 && i < n_col) ; } } for (r = 0 ; r < n_row ; r++) { if (ROW_IS_ALIVE (r)) { i = 0 ; len = Row [r].length ; deg = Row [r].shared1.degree ; assert (len > 0) ; assert (deg > 0) ; rp = &A [Row [r].start] ; rp_end = rp + len ; while (rp < rp_end) { c = *rp++ ; if (COL_IS_ALIVE (c)) { i++ ; } } assert (i > 0) ; } } } /* ========================================================================== */ /* === debug_deg_lists ====================================================== */ /* ========================================================================== */ /* Prints the contents of the degree lists. Counts the number of columns in the degree list and compares it to the total it should have. Also checks the row degrees. */ PRIVATE void debug_deg_lists ( /* === Parameters ======================================================= */ int n_row, int n_col, RowInfo Row [], ColInfo Col [], int head [], int min_score, int should, int max_deg ) { /* === Local variables ================================================== */ int deg ; int col ; int have ; int row ; /* === Check the degree lists =========================================== */ if (n_col > 10000 && debug_colamd <= 0) { return ; } have = 0 ; DEBUG4 (("Degree lists: %d\n", min_score)) ; for (deg = 0 ; deg <= n_col ; deg++) { col = head [deg] ; if (col == EMPTY) { continue ; } DEBUG4 (("%d:", deg)) ; while (col != EMPTY) { DEBUG4 ((" %d", col)) ; have += Col [col].shared1.thickness ; assert (COL_IS_ALIVE (col)) ; col = Col [col].shared4.degree_next ; } DEBUG4 (("\n")) ; } DEBUG4 (("should %d have %d\n", should, have)) ; assert (should == have) ; /* === Check the row degrees ============================================ */ if (n_row > 10000 && debug_colamd <= 0) { return ; } for (row = 0 ; row < n_row ; row++) { if (ROW_IS_ALIVE (row)) { assert (Row [row].shared1.degree <= max_deg) ; } } } /* ========================================================================== */ /* === debug_mark =========================================================== */ /* ========================================================================== */ /* Ensures that the tag_mark is less that the maximum and also ensures that each entry in the mark array is less than the tag mark. */ PRIVATE void debug_mark ( /* === Parameters ======================================================= */ int n_row, RowInfo Row [], int tag_mark, int max_mark ) { /* === Local variables ================================================== */ int r ; /* === Check the Row marks ============================================== */ assert (tag_mark > 0 && tag_mark <= max_mark) ; if (n_row > 10000 && debug_colamd <= 0) { return ; } for (r = 0 ; r < n_row ; r++) { assert (Row [r].shared2.mark < tag_mark) ; } } /* ========================================================================== */ /* === debug_matrix ========================================================= */ /* ========================================================================== */ /* Prints out the contents of the columns and the rows. */ PRIVATE void debug_matrix ( /* === Parameters ======================================================= */ int n_row, int n_col, RowInfo Row [], ColInfo Col [], int A [] ) { /* === Local variables ================================================== */ int r ; int c ; int *rp ; int *rp_end ; int *cp ; int *cp_end ; /* === Dump the rows and columns of the matrix ========================== */ if (debug_colamd < 3) { return ; } DEBUG3 (("DUMP MATRIX:\n")) ; for (r = 0 ; r < n_row ; r++) { DEBUG3 (("Row %d alive? %d\n", r, ROW_IS_ALIVE (r))) ; if (ROW_IS_DEAD (r)) { continue ; } DEBUG3 (("start %d length %d degree %d\n", Row [r].start, Row [r].length, Row [r].shared1.degree)) ; rp = &A [Row [r].start] ; rp_end = rp + Row [r].length ; while (rp < rp_end) { c = *rp++ ; DEBUG3 ((" %d col %d\n", COL_IS_ALIVE (c), c)) ; } } for (c = 0 ; c < n_col ; c++) { DEBUG3 (("Col %d alive? %d\n", c, COL_IS_ALIVE (c))) ; if (COL_IS_DEAD (c)) { continue ; } DEBUG3 (("start %d length %d shared1 %d shared2 %d\n", Col [c].start, Col [c].length, Col [c].shared1.thickness, Col [c].shared2.score)) ; cp = &A [Col [c].start] ; cp_end = cp + Col [c].length ; while (cp < cp_end) { r = *cp++ ; DEBUG3 ((" %d row %d\n", ROW_IS_ALIVE (r), r)) ; } } } #endif SuperLU_DIST_5.3.0/SRC/old_colamd.h0000644013363400111340000000557013233431301015540 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief colamd include file */ /* ========================================================================== */ /* === colamd prototypes and definitions ==================================== */ /* ========================================================================== */ /* This is the colamd include file, http://www.cise.ufl.edu/~davis/colamd/colamd.h for use in the colamd.c, colamdmex.c, and symamdmex.c files located at http://www.cise.ufl.edu/~davis/colamd/ See those files for a description of colamd and symamd, and for the copyright notice, which also applies to this file. August 3, 1998. Version 1.0. */ /* ========================================================================== */ /* === Definitions ========================================================== */ /* ========================================================================== */ /* size of the knobs [ ] array. Only knobs [0..1] are currently used. */ #define COLAMD_KNOBS 20 /* number of output statistics. Only A [0..2] are currently used. */ #define COLAMD_STATS 20 /* knobs [0] and A [0]: dense row knob and output statistic. */ #define COLAMD_DENSE_ROW 0 /* knobs [1] and A [1]: dense column knob and output statistic. */ #define COLAMD_DENSE_COL 1 /* A [2]: memory defragmentation count output statistic */ #define COLAMD_DEFRAG_COUNT 2 /* A [3]: whether or not the input columns were jumbled or had duplicates */ #define COLAMD_JUMBLED_COLS 3 /* ========================================================================== */ /* === Prototypes of user-callable routines ================================= */ /* ========================================================================== */ #ifdef _CRAY #define int short #elif defined (_LONGINT) #define int long #endif int colamd_recommended /* returns recommended value of Alen */ ( int nnz, /* nonzeros in A */ int n_row, /* number of rows in A */ int n_col /* number of columns in A */ ) ; void colamd_set_defaults /* sets default parameters */ ( /* knobs argument is modified on output */ double knobs [COLAMD_KNOBS] /* parameter settings for colamd */ ) ; int colamd /* returns TRUE if successful, FALSE otherwise*/ ( /* A and p arguments are modified on output */ int n_row, /* number of rows in A */ int n_col, /* number of columns in A */ int Alen, /* size of the array A */ int A [], /* row indices of A, of size Alen */ int p [], /* column pointers of A, of size n_col+1 */ double knobs [COLAMD_KNOBS] /* parameter settings for colamd */ ) ; SuperLU_DIST_5.3.0/SRC/pdgstrs_Bglobal.c0000644013363400111340000007531313233431301016550 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Solves a system of distributed linear equations A*X = B with a general N-by-N matrix A using the LU factorization * *
 * -- Distributed SuperLU routine (version 2.3) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * October 15, 2008
 *
 * Modified:
 *     Feburary 7, 2001    use MPI_Isend/MPI_Irecv
 *     October 2, 2001     use MPI_Isend/MPI_Irecv with MPI_Test
 *     October 15, 2008  use fewer MPI_Reduce
 * 
*/ #include "superlu_ddefs.h" #define ISEND_IRECV /* * Function prototypes */ #ifdef _CRAY fortran void STRSM(_fcd, _fcd, _fcd, _fcd, int*, int*, double*, double*, int*, double*, int*); fortran void SGEMM(_fcd, _fcd, int*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int*); _fcd ftcs1; _fcd ftcs2; _fcd ftcs3; #endif static void gather_diag_to_all(int_t, int_t, double [], Glu_persist_t *, LocalLU_t *, gridinfo_t *, int_t, int_t [], int_t [], double [], int_t, double []); /*! \brief * *
 * Purpose
 * =======
 *
 * pdgstrs_Bglobal solves a system of distributed linear equations
 * A*X = B with a general N-by-N matrix A using the LU factorization
 * computed by pdgstrf.
 * 
 * Arguments
 * =========
 *
 * n      (input) int (global)
 *        The order of the system of linear equations.
 *
 * LUstruct (input) LUstruct_t*
 *        The distributed data structures storing L and U factors.
 *        The L and U factors are obtained from pdgstrf for
 *        the possibly scaled and permuted matrix A.
 *        See superlu_ddefs.h for the definition of 'LUstruct_t'.
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh. It contains the MPI communicator, the number
 *        of process rows (NPROW), the number of process columns (NPCOL),
 *        and my process rank. It is an input argument to all the
 *        parallel routines.
 *        Grid can be initialized by subroutine SUPERLU_GRIDINIT.
 *        See superlu_ddefs.h for the definition of 'gridinfo_t'.
 *
 * B      (input/output) double*
 *        On entry, the right-hand side matrix of the possibly equilibrated
 *        and row permuted system.
 *        On exit, the solution matrix of the possibly equilibrated
 *        and row permuted system if info = 0;
 *
 *        NOTE: Currently, the N-by-NRHS  matrix B must reside on all 
 *              processes when calling this routine.
 *
 * ldb    (input) int (global)
 *        Leading dimension of matrix B.
 *
 * nrhs   (input) int (global)
 *        Number of right-hand sides.
 *
 * stat   (output) SuperLUStat_t*
 *        Record the statistics about the triangular solves.
 *        See util.h for the definition of 'SuperLUStat_t'.
 *
 * info   (output) int*
 * 	   = 0: successful exit
 *	   < 0: if info = -i, the i-th argument had an illegal value
 * 
*/ void pdgstrs_Bglobal(int_t n, LUstruct_t *LUstruct, gridinfo_t *grid, double *B, int_t ldb, int nrhs, SuperLUStat_t *stat, int *info) { Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; double alpha = 1.0; double *lsum; /* Local running sum of the updates to B-components */ double *x; /* X component at step k. */ double *lusup, *dest; double *recvbuf, *tempv; double *rtemp; /* Result of full matrix-vector multiply. */ int_t **Ufstnz_br_ptr = Llu->Ufstnz_br_ptr; int_t *Urbs, *Urbs1; /* Number of row blocks in each block column of U. */ Ucb_indptr_t **Ucb_indptr;/* Vertical linked list pointing to Uindex[] */ int_t **Ucb_valptr; /* Vertical linked list pointing to Unzval[] */ int_t kcol, krow, mycol, myrow; int_t i, ii, il, j, jj, k, lb, ljb, lk, lptr, luptr; int_t nb, nlb, nub, nsupers; int_t *xsup, *lsub, *usub; int_t *ilsum; /* Starting position of each supernode in lsum (LOCAL)*/ int Pc, Pr, iam; int knsupc, nsupr; int ldalsum; /* Number of lsum entries locally owned. */ int maxrecvsz, p, pi; int_t **Lrowind_bc_ptr; double **Lnzval_bc_ptr; MPI_Status status; #if defined (ISEND_IRECV) || defined (BSEND) MPI_Request *send_req, recv_req; #endif /*-- Counts used for L-solve --*/ int_t *fmod; /* Modification count for L-solve. */ int_t **fsendx_plist = Llu->fsendx_plist; int_t nfrecvx = Llu->nfrecvx; /* Number of X components to be recv'd. */ int_t *frecv; /* Count of modifications to be recv'd from processes in this row. */ int_t nfrecvmod = 0; /* Count of total modifications to be recv'd. */ int_t nleaf = 0, nroot = 0; /*-- Counts used for U-solve --*/ int_t *bmod; /* Modification count for L-solve. */ int_t **bsendx_plist = Llu->bsendx_plist; int_t nbrecvx = Llu->nbrecvx; /* Number of X components to be recv'd. */ int_t *brecv; /* Count of modifications to be recv'd from processes in this row. */ int_t nbrecvmod = 0; /* Count of total modifications to be recv'd. */ double t; #if ( DEBUGlevel>=2 ) int_t Ublocks = 0; #endif int_t *mod_bit = Llu->mod_bit; /* flag contribution from each row block */ t = SuperLU_timer_(); /* Test input parameters. */ *info = 0; if ( n < 0 ) *info = -1; else if ( nrhs < 0 ) *info = -9; if ( *info ) { pxerr_dist("PDGSTRS_BGLOBAL", grid, -*info); return; } /* * Initialization. */ iam = grid->iam; Pc = grid->npcol; Pr = grid->nprow; myrow = MYROW( iam, grid ); mycol = MYCOL( iam, grid ); nsupers = Glu_persist->supno[n-1] + 1; xsup = Glu_persist->xsup; Lrowind_bc_ptr = Llu->Lrowind_bc_ptr; Lnzval_bc_ptr = Llu->Lnzval_bc_ptr; nlb = CEILING( nsupers, Pr ); /* Number of local block rows. */ stat->ops[SOLVE] = 0.0; Llu->SolveMsgSent = 0; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter pdgstrs_Bglobal()"); #endif /* Save the count to be altered so it can be used by subsequent call to PDGSTRS_BGLOBAL. */ if ( !(fmod = intMalloc_dist(nlb)) ) ABORT("Calloc fails for fmod[]."); for (i = 0; i < nlb; ++i) fmod[i] = Llu->fmod[i]; if ( !(frecv = intMalloc_dist(nlb)) ) ABORT("Malloc fails for frecv[]."); Llu->frecv = frecv; #if defined (ISEND_IRECV) || defined (BSEND) k = SUPERLU_MAX( Llu->nfsendx, Llu->nbsendx ) + nlb; if ( !(send_req = (MPI_Request*) SUPERLU_MALLOC(k*sizeof(MPI_Request))) ) ABORT("Malloc fails for send_req[]."); #endif #ifdef _CRAY ftcs1 = _cptofcd("L", strlen("L")); ftcs2 = _cptofcd("N", strlen("N")); ftcs3 = _cptofcd("U", strlen("U")); #endif /* Obtain ilsum[] and ldalsum for process column 0. */ ilsum = Llu->ilsum; ldalsum = Llu->ldalsum; /* Allocate working storage. */ knsupc = sp_ienv_dist(3); maxrecvsz = knsupc * nrhs + SUPERLU_MAX( XK_H, LSUM_H ); if ( !(lsum = doubleCalloc_dist(((size_t)ldalsum) * nrhs + nlb * LSUM_H)) ) ABORT("Calloc fails for lsum[]."); if ( !(x = doubleMalloc_dist(((size_t)ldalsum) * nrhs + nlb * XK_H)) ) ABORT("Malloc fails for x[]."); if ( !(recvbuf = doubleMalloc_dist(maxrecvsz)) ) ABORT("Malloc fails for recvbuf[]."); if ( !(rtemp = doubleCalloc_dist(maxrecvsz)) ) ABORT("Malloc fails for rtemp[]."); /*--------------------------------------------------- * Forward solve Ly = b. *---------------------------------------------------*/ /* * Copy B into X on the diagonal processes. */ ii = 0; for (k = 0; k < nsupers; ++k) { knsupc = SuperSize( k ); krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ il = LSUM_BLK( lk ); lsum[il - LSUM_H] = k; /* Block number prepended in the header. */ kcol = PCOL( k, grid ); if ( mycol == kcol ) { /* Diagonal process. */ jj = X_BLK( lk ); x[jj - XK_H] = k; /* Block number prepended in the header. */ RHS_ITERATE(j) for (i = 0; i < knsupc; ++i) /* X is stored in blocks. */ x[i + jj + j*knsupc] = B[i + ii + j*ldb]; } } ii += knsupc; } /* * Compute frecv[] and nfrecvmod counts on the diagonal processes. */ { superlu_scope_t *scp = &grid->rscp; #if 1 for (k = 0; k < nlb; ++k) mod_bit[k] = 0; for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ kcol = PCOL( k, grid ); if ( mycol != kcol && fmod[lk] ) mod_bit[lk] = 1; /* contribution from off-diagonal */ } } /* Every process receives the count, but it is only useful on the diagonal processes. */ MPI_Allreduce( mod_bit, frecv, nlb, mpi_int_t, MPI_SUM, scp->comm ); for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ kcol = PCOL( k, grid ); if ( mycol == kcol ) { /* Diagonal process. */ nfrecvmod += frecv[lk]; if ( !frecv[lk] && !fmod[lk] ) ++nleaf; } } } #else /* old */ for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ kcol = PCOL( k, grid ); /* Root process in this row scope. */ if ( mycol != kcol && fmod[lk] ) i = 1; /* Contribution from non-diagonal process. */ else i = 0; MPI_Reduce( &i, &frecv[lk], 1, mpi_int_t, MPI_SUM, kcol, scp->comm ); if ( mycol == kcol ) { /* Diagonal process. */ nfrecvmod += frecv[lk]; if ( !frecv[lk] && !fmod[lk] ) ++nleaf; #if ( DEBUGlevel>=2 ) printf("(%2d) frecv[%4d] %2d\n", iam, k, frecv[lk]); assert( frecv[lk] < Pc ); #endif } } } #endif } /* --------------------------------------------------------- Solve the leaf nodes first by all the diagonal processes. --------------------------------------------------------- */ #if ( DEBUGlevel>=2 ) printf("(%2d) nleaf %4d\n", iam, nleaf); #endif for (k = 0; k < nsupers && nleaf; ++k) { krow = PROW( k, grid ); kcol = PCOL( k, grid ); if ( myrow == krow && mycol == kcol ) { /* Diagonal process */ knsupc = SuperSize( k ); lk = LBi( k, grid ); if ( frecv[lk]==0 && fmod[lk]==0 ) { fmod[lk] = -1; /* Do not solve X[k] in the future. */ ii = X_BLK( lk ); lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY STRSM(ftcs1, ftcs1, ftcs2, ftcs3, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #elif defined (USE_VENDOR_BLAS) dtrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc, 1, 1, 1, 1); #else dtrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif stat->ops[SOLVE] += knsupc * (knsupc - 1) * nrhs; --nleaf; #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ for (p = 0; p < Pr; ++p) { if ( fsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); #ifdef ISEND_IRECV MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm, &send_req[Llu->SolveMsgSent++]); #else #ifdef BSEND MPI_Bsend( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm ); #else MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm ); #endif #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii-XK_H], pi); #endif } } /* * Perform local block modifications: lsum[i] -= L_i,k * X[k] */ nb = lsub[0] - 1; lptr = BC_HEADER + LB_DESCRIPTOR + knsupc; luptr = knsupc; /* Skip diagonal block L(k,k). */ dlsum_fmod(lsum, x, &x[ii], rtemp, nrhs, knsupc, k, fmod, nb, lptr, luptr, xsup, grid, Llu, send_req,stat); } } /* if diagonal process ... */ } /* for k ... */ /* ----------------------------------------------------------- Compute the internal nodes asynchronously by all processes. ----------------------------------------------------------- */ #if ( DEBUGlevel>=2 ) printf("(%2d) nfrecvx %4d, nfrecvmod %4d, nleaf %4d\n", iam, nfrecvx, nfrecvmod, nleaf); #endif while ( nfrecvx || nfrecvmod ) { /* While not finished. */ /* Receive a message. */ #ifdef ISEND_IRECV /* -MPI- FATAL: Remote protocol queue full */ MPI_Irecv( recvbuf, maxrecvsz, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, grid->comm, &recv_req ); MPI_Wait( &recv_req, &status ); #else MPI_Recv( recvbuf, maxrecvsz, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, grid->comm, &status ); #endif k = *recvbuf; #if ( DEBUGlevel>=2 ) printf("(%2d) Recv'd block %d, tag %2d\n", iam, k, status.MPI_TAG); #endif switch ( status.MPI_TAG ) { case Xk: --nfrecvx; lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; if ( lsub ) { nb = lsub[0]; lptr = BC_HEADER; luptr = 0; knsupc = SuperSize( k ); /* * Perform local block modifications: lsum[i] -= L_i,k * X[k] */ dlsum_fmod(lsum, x, &recvbuf[XK_H], rtemp, nrhs, knsupc, k, fmod, nb, lptr, luptr, xsup, grid, Llu, send_req, stat); } /* if lsub */ break; case LSUM: /* Receiver must be a diagonal process */ --nfrecvmod; lk = LBi( k, grid ); /* Local block number, row-wise. */ ii = X_BLK( lk ); knsupc = SuperSize( k ); tempv = &recvbuf[LSUM_H]; RHS_ITERATE(j) for (i = 0; i < knsupc; ++i) x[i + ii + j*knsupc] += tempv[i + j*knsupc]; if ( (--frecv[lk])==0 && fmod[lk]==0 ) { fmod[lk] = -1; /* Do not solve X[k] in the future. */ lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY STRSM(ftcs1, ftcs1, ftcs2, ftcs3, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #elif defined (USE_VENDOR_BLAS) dtrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc, 1, 1, 1, 1); #else dtrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif stat->ops[SOLVE] += knsupc * (knsupc - 1) * nrhs; #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ kcol = PCOL( k, grid ); for (p = 0; p < Pr; ++p) { if ( fsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); #ifdef ISEND_IRECV MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm, &send_req[Llu->SolveMsgSent++]); #else #ifdef BSEND MPI_Bsend( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm ); #else MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm ); #endif #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii-XK_H], pi); #endif } } /* * Perform local block modifications. */ nb = lsub[0] - 1; lptr = BC_HEADER + LB_DESCRIPTOR + knsupc; luptr = knsupc; /* Skip diagonal block L(k,k). */ dlsum_fmod(lsum, x, &x[ii], rtemp, nrhs, knsupc, k, fmod, nb, lptr, luptr, xsup, grid, Llu, send_req, stat); } /* if */ break; #if ( DEBUGlevel>=2 ) default: printf("(%2d) Recv'd wrong message tag %4d\n", iam, status.MPI_TAG); break; #endif } /* switch */ } /* while not finished ... */ #if ( PRNTlevel>=2 ) t = SuperLU_timer_() - t; if ( !iam ) printf(".. L-solve time\t%8.2f\n", t); t = SuperLU_timer_(); #endif #if ( DEBUGlevel>=2 ) printf("\n(%d) .. After L-solve: y =\n", iam); for (i = 0, k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); kcol = PCOL( k, grid ); if ( myrow == krow && mycol == kcol ) { /* Diagonal process */ knsupc = SuperSize( k ); lk = LBi( k, grid ); ii = X_BLK( lk ); for (j = 0; j < knsupc; ++j) printf("\t(%d)\t%4d\t%.10f\n", iam, xsup[k]+j, x[ii+j]); } MPI_Barrier( grid->comm ); } #endif SUPERLU_FREE(fmod); SUPERLU_FREE(frecv); SUPERLU_FREE(rtemp); #ifdef ISEND_IRECV for (i = 0; i < Llu->SolveMsgSent; ++i) MPI_Request_free(&send_req[i]); Llu->SolveMsgSent = 0; #endif /*--------------------------------------------------- * Back solve Ux = y. * * The Y components from the forward solve is already * on the diagonal processes. *---------------------------------------------------*/ /* Save the count to be altered so it can be used by subsequent call to PDGSTRS_BGLOBAL. */ if ( !(bmod = intMalloc_dist(nlb)) ) ABORT("Calloc fails for bmod[]."); for (i = 0; i < nlb; ++i) bmod[i] = Llu->bmod[i]; if ( !(brecv = intMalloc_dist(nlb)) ) ABORT("Malloc fails for brecv[]."); Llu->brecv = brecv; /* * Compute brecv[] and nbrecvmod counts on the diagonal processes. */ { superlu_scope_t *scp = &grid->rscp; #if 1 for (k = 0; k < nlb; ++k) mod_bit[k] = 0; for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ kcol = PCOL( k, grid ); /* Root process in this row scope. */ if ( mycol != kcol && bmod[lk] ) mod_bit[lk] = 1; /* Contribution from off-diagonal */ } } /* Every process receives the count, but it is only useful on the diagonal processes. */ MPI_Allreduce( mod_bit, brecv, nlb, mpi_int_t, MPI_SUM, scp->comm ); for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ kcol = PCOL( k, grid ); /* Root process in this row scope. */ if ( mycol == kcol ) { /* Diagonal process. */ nbrecvmod += brecv[lk]; if ( !brecv[lk] && !bmod[lk] ) ++nroot; #if ( DEBUGlevel>=2 ) printf("(%2d) brecv[%4d] %2d\n", iam, k, brecv[lk]); assert( brecv[lk] < Pc ); #endif } } } #else /* old */ for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ kcol = PCOL( k, grid ); /* Root process in this row scope. */ if ( mycol != kcol && bmod[lk] ) i = 1; /* Contribution from non-diagonal process. */ else i = 0; MPI_Reduce( &i, &brecv[lk], 1, mpi_int_t, MPI_SUM, kcol, scp->comm ); if ( mycol == kcol ) { /* Diagonal process. */ nbrecvmod += brecv[lk]; if ( !brecv[lk] && !bmod[lk] ) ++nroot; #if ( DEBUGlevel>=2 ) printf("(%2d) brecv[%4d] %2d\n", iam, k, brecv[lk]); assert( brecv[lk] < Pc ); #endif } } } #endif } /* Re-initialize lsum to zero. Each block header is already in place. */ for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { knsupc = SuperSize( k ); lk = LBi( k, grid ); il = LSUM_BLK( lk ); dest = &lsum[il]; RHS_ITERATE(j) for (i = 0; i < knsupc; ++i) dest[i + j*knsupc] = 0.0; } } /* Set up additional pointers for the index and value arrays of U. nub is the number of local block columns. */ nub = CEILING( nsupers, Pc ); /* Number of local block columns. */ if ( !(Urbs = (int_t *) intCalloc_dist(2*((size_t)nub))) ) ABORT("Malloc fails for Urbs[]"); /* Record number of nonzero blocks in a block column. */ Urbs1 = Urbs + nub; if ( !(Ucb_indptr = SUPERLU_MALLOC(nub * sizeof(Ucb_indptr_t *))) ) ABORT("Malloc fails for Ucb_indptr[]"); if ( !(Ucb_valptr = SUPERLU_MALLOC(nub * sizeof(int_t *))) ) ABORT("Malloc fails for Ucb_valptr[]"); /* Count number of row blocks in a block column. One pass of the skeleton graph of U. */ for (lk = 0; lk < nlb; ++lk) { usub = Ufstnz_br_ptr[lk]; if ( usub ) { /* Not an empty block row. */ /* usub[0] -- number of column blocks in this block row. */ #if ( DEBUGlevel>=2 ) Ublocks += usub[0]; #endif i = BR_HEADER; /* Pointer in index array. */ for (lb = 0; lb < usub[0]; ++lb) { /* For all column blocks. */ k = usub[i]; /* Global block number */ ++Urbs[LBj(k,grid)]; i += UB_DESCRIPTOR + SuperSize( k ); } } } /* Set up the vertical linked lists for the row blocks. One pass of the skeleton graph of U. */ for (lb = 0; lb < nub; ++lb) { if ( Urbs[lb] ) { /* Not an empty block column. */ if ( !(Ucb_indptr[lb] = SUPERLU_MALLOC(Urbs[lb] * sizeof(Ucb_indptr_t))) ) ABORT("Malloc fails for Ucb_indptr[lb][]"); if ( !(Ucb_valptr[lb] = (int_t *) intMalloc_dist(Urbs[lb])) ) ABORT("Malloc fails for Ucb_valptr[lb][]"); } } for (lk = 0; lk < nlb; ++lk) { /* For each block row. */ usub = Ufstnz_br_ptr[lk]; if ( usub ) { /* Not an empty block row. */ i = BR_HEADER; /* Pointer in index array. */ j = 0; /* Pointer in nzval array. */ for (lb = 0; lb < usub[0]; ++lb) { /* For all column blocks. */ k = usub[i]; /* Global block number, column-wise. */ ljb = LBj( k, grid ); /* Local block number, column-wise. */ Ucb_indptr[ljb][Urbs1[ljb]].lbnum = lk; Ucb_indptr[ljb][Urbs1[ljb]].indpos = i; Ucb_valptr[ljb][Urbs1[ljb]] = j; ++Urbs1[ljb]; j += usub[i+1]; i += UB_DESCRIPTOR + SuperSize( k ); } } } #if ( DEBUGlevel>=2 ) for (p = 0; p < Pr*Pc; ++p) { if (iam == p) { printf("(%2d) .. Ublocks %d\n", iam, Ublocks); for (lb = 0; lb < nub; ++lb) { printf("(%2d) Local col %2d: # row blocks %2d\n", iam, lb, Urbs[lb]); if ( Urbs[lb] ) { for (i = 0; i < Urbs[lb]; ++i) printf("(%2d) .. row blk %2d:\ lbnum %d, indpos %d, valpos %d\n", iam, i, Ucb_indptr[lb][i].lbnum, Ucb_indptr[lb][i].indpos, Ucb_valptr[lb][i]); } } } MPI_Barrier( grid->comm ); } for (p = 0; p < Pr*Pc; ++p) { if ( iam == p ) { printf("\n(%d) bsendx_plist[][]", iam); for (lb = 0; lb < nub; ++lb) { printf("\n(%d) .. local col %2d: ", iam, lb); for (i = 0; i < Pr; ++i) printf("%4d", bsendx_plist[lb][i]); } printf("\n"); } MPI_Barrier( grid->comm ); } #endif /* DEBUGlevel */ #if ( PRNTlevel>=2 ) t = SuperLU_timer_() - t; if ( !iam) printf(".. Setup U-solve time\t%8.2f\n", t); t = SuperLU_timer_(); #endif /* * Solve the roots first by all the diagonal processes. */ #if ( DEBUGlevel>=2 ) printf("(%2d) nroot %4d\n", iam, nroot); #endif for (k = nsupers-1; k >= 0 && nroot; --k) { krow = PROW( k, grid ); kcol = PCOL( k, grid ); if ( myrow == krow && mycol == kcol ) { /* Diagonal process. */ knsupc = SuperSize( k ); lk = LBi( k, grid ); /* Local block number, row-wise. */ if ( brecv[lk]==0 && bmod[lk]==0 ) { bmod[lk] = -1; /* Do not solve X[k] in the future. */ ii = X_BLK( lk ); lk = LBj( k, grid ); /* Local block number, column-wise */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY STRSM(ftcs1, ftcs3, ftcs2, ftcs2, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #elif defined (USE_VENDOR_BLAS) dtrsm_("L", "U", "N", "N", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc, 1, 1, 1, 1); #else dtrsm_("L", "U", "N", "N", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif stat->ops[SOLVE] += knsupc * (knsupc + 1) * nrhs; --nroot; #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ for (p = 0; p < Pr; ++p) { if ( bsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); #ifdef ISEND_IRECV MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm, &send_req[Llu->SolveMsgSent++]); #else #ifdef BSEND MPI_Bsend( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm ); #else MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm ); #endif #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii-XK_H], pi); #endif } } /* * Perform local block modifications: lsum[i] -= U_i,k * X[k] */ if ( Urbs[lk] ) dlsum_bmod(lsum, x, &x[ii], nrhs, k, bmod, Urbs, Ucb_indptr, Ucb_valptr, xsup, grid, Llu, send_req, stat); } /* if root ... */ } /* if diagonal process ... */ } /* for k ... */ /* * Compute the internal nodes asynchronously by all processes. */ while ( nbrecvx || nbrecvmod ) { /* While not finished. */ /* Receive a message. */ MPI_Recv( recvbuf, maxrecvsz, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, grid->comm, &status ); k = *recvbuf; #if ( DEBUGlevel>=2 ) printf("(%2d) Recv'd block %d, tag %2d\n", iam, k, status.MPI_TAG); #endif switch ( status.MPI_TAG ) { case Xk: --nbrecvx; lk = LBj( k, grid ); /* Local block number, column-wise. */ /* * Perform local block modifications: * lsum[i] -= U_i,k * X[k] */ dlsum_bmod(lsum, x, &recvbuf[XK_H], nrhs, k, bmod, Urbs, Ucb_indptr, Ucb_valptr, xsup, grid, Llu, send_req, stat); break; case LSUM: /* Receiver must be a diagonal process */ --nbrecvmod; lk = LBi( k, grid ); /* Local block number, row-wise. */ ii = X_BLK( lk ); knsupc = SuperSize( k ); tempv = &recvbuf[LSUM_H]; RHS_ITERATE(j) for (i = 0; i < knsupc; ++i) x[i + ii + j*knsupc] += tempv[i + j*knsupc]; if ( (--brecv[lk])==0 && bmod[lk]==0 ) { bmod[lk] = -1; /* Do not solve X[k] in the future. */ lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY STRSM(ftcs1, ftcs3, ftcs2, ftcs2, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #elif defined (USE_VENDOR_BLAS) dtrsm_("L", "U", "N", "N", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc, 1, 1, 1, 1); #else dtrsm_("L", "U", "N", "N", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif stat->ops[SOLVE] += knsupc * (knsupc + 1) * nrhs; #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ kcol = PCOL( k, grid ); for (p = 0; p < Pr; ++p) { if ( bsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); #ifdef ISEND_IRECV MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm, &send_req[Llu->SolveMsgSent++] ); #else #ifdef BSEND MPI_Bsend( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm ); #else MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm ); #endif #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii - XK_H], pi); #endif } } /* * Perform local block modifications: * lsum[i] -= U_i,k * X[k] */ if ( Urbs[lk] ) dlsum_bmod(lsum, x, &x[ii], nrhs, k, bmod, Urbs, Ucb_indptr, Ucb_valptr, xsup, grid, Llu, send_req, stat); } /* if becomes solvable */ break; #if ( DEBUGlevel>=2 ) default: printf("(%2d) Recv'd wrong message tag %4d\n", iam, status.MPI_TAG); break; #endif } /* switch */ } /* while not finished ... */ #if ( PRNTlevel>=2 ) t = SuperLU_timer_() - t; if ( !iam ) printf(".. U-solve time\t%8.2f\n", t); #endif /* Copy the solution X into B (on all processes). */ { int_t num_diag_procs, *diag_procs, *diag_len; double *work; get_diag_procs(n, Glu_persist, grid, &num_diag_procs, &diag_procs, &diag_len); jj = diag_len[0]; for (j = 1; j < num_diag_procs; ++j) jj = SUPERLU_MAX(jj, diag_len[j]); if ( !(work = doubleMalloc_dist(((size_t)jj)*nrhs)) ) ABORT("Malloc fails for work[]"); gather_diag_to_all(n, nrhs, x, Glu_persist, Llu, grid, num_diag_procs, diag_procs, diag_len, B, ldb, work); SUPERLU_FREE(diag_procs); SUPERLU_FREE(diag_len); SUPERLU_FREE(work); } /* Deallocate storage. */ SUPERLU_FREE(lsum); SUPERLU_FREE(x); SUPERLU_FREE(recvbuf); for (i = 0; i < nub; ++i) if ( Urbs[i] ) { SUPERLU_FREE(Ucb_indptr[i]); SUPERLU_FREE(Ucb_valptr[i]); } SUPERLU_FREE(Ucb_indptr); SUPERLU_FREE(Ucb_valptr); SUPERLU_FREE(Urbs); SUPERLU_FREE(bmod); SUPERLU_FREE(brecv); #ifdef ISEND_IRECV for (i = 0; i < Llu->SolveMsgSent; ++i) MPI_Request_free(&send_req[i]); SUPERLU_FREE(send_req); #endif #ifdef BSEND SUPERLU_FREE(send_req); #endif stat->utime[SOLVE] = SuperLU_timer_() - t; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pdgstrs_Bglobal()"); #endif } /* PDGSTRS_BGLOBAL */ /* * Gather the components of x vector on the diagonal processes * onto all processes, and combine them into the global vector y. */ static void gather_diag_to_all(int_t n, int_t nrhs, double x[], Glu_persist_t *Glu_persist, LocalLU_t *Llu, gridinfo_t *grid, int_t num_diag_procs, int_t diag_procs[], int_t diag_len[], double y[], int_t ldy, double work[]) { int_t i, ii, j, k, lk, lwork, nsupers, p; int_t *ilsum, *xsup; int iam, knsupc, pkk; double *x_col, *y_col; iam = grid->iam; nsupers = Glu_persist->supno[n-1] + 1; xsup = Glu_persist->xsup; ilsum = Llu->ilsum; for (p = 0; p < num_diag_procs; ++p) { pkk = diag_procs[p]; if ( iam == pkk ) { /* Copy x vector into a buffer. */ lwork = 0; for (k = p; k < nsupers; k += num_diag_procs) { knsupc = SuperSize( k ); lk = LBi( k, grid ); ii = X_BLK( lk ); /*ilsum[lk] + (lk+1)*XK_H;*/ x_col = &x[ii]; for (j = 0; j < nrhs; ++j) { for (i = 0; i < knsupc; ++i) work[i+lwork] = x_col[i]; lwork += knsupc; x_col += knsupc; } } MPI_Bcast( work, lwork, MPI_DOUBLE, pkk, grid->comm ); } else { MPI_Bcast( work, diag_len[p]*nrhs, MPI_DOUBLE, pkk, grid->comm ); } /* Scatter work[] into global y vector. */ lwork = 0; for (k = p; k < nsupers; k += num_diag_procs) { knsupc = SuperSize( k ); ii = FstBlockC( k ); y_col = &y[ii]; for (j = 0; j < nrhs; ++j) { for (i = 0; i < knsupc; ++i) y_col[i] = work[i+lwork]; lwork += knsupc; y_col += ldy; } } } } /* GATHER_DIAG_TO_ALL */ SuperLU_DIST_5.3.0/SRC/pzsymbfact_distdata.c0000644013363400111340000017504113233431301017476 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Redistribute the symbolic structure of L and U from the distribution * *
 * -- Parallel symbolic factorization auxialiary routine (version 2.3) --
 * -- Distributes the data from parallel symbolic factorization 
 * -- to numeric factorization
 * INRIA France -  July 1, 2004
 * Laura Grigori
 *
 * November 1, 2007
 * Feburary 20, 2008
 * October 15, 2008
 * 
*/ /* limits.h: the largest positive integer (INT_MAX) */ #include #include "superlu_zdefs.h" #include "psymbfact.h" /*! \brief * *
 * Purpose
 * =======
 * 
 * Redistribute the symbolic structure of L and U from the distribution
 * used in the parallel symbolic factorization step to the distdibution
 * used in the parallel numeric factorization step.  On exit, the L and U
 * structure for the 2D distribution used in the numeric factorization step is
 * stored in p_xlsub, p_lsub, p_xusub, p_usub.  The global supernodal 
 * information is also computed and it is stored in Glu_persist->supno
 * and Glu_persist->xsup.
 *
 * This routine allocates memory for storing the structure of L and U
 * and the supernodes information.  This represents the arrays:
 * p_xlsub, p_lsub, p_xusub, p_usub,
 * Glu_persist->supno,  Glu_persist->xsup.
 *
 * This routine also deallocates memory allocated during symbolic 
 * factorization routine.  That is, the folloing arrays are freed:
 * Pslu_freeable->xlsub,  Pslu_freeable->lsub, 
 * Pslu_freeable->xusub, Pslu_freeable->usub, 
 * Pslu_freeable->globToLoc, Pslu_freeable->supno_loc, 
 * Pslu_freeable->xsup_beg_loc, Pslu_freeable->xsup_end_loc.
 *
 * Arguments
 * =========
 *
 * n      (Input) int_t
 *        Order of the input matrix
 * Pslu_freeable  (Input) Pslu_freeable_t *
 *        Local L and U structure, 
 *        global to local indexing information.
 * 
 * Glu_persist (Output) Glu_persist_t *
 *        Stores on output the information on supernodes mapping.
 * 
 * p_xlsub (Output) int_t **
 *         Pointer to structure of L distributed on a 2D grid 
 *         of processors, stored by columns.
 * 
 * p_lsub  (Output) int_t **
 *         Structure of L distributed on a 2D grid of processors, 
 *         stored by columns.
 *
 * p_xusub (Output) int_t **
 *         Pointer to structure of U distributed on a 2D grid 
 *         of processors, stored by rows.
 * 
 * p_usub  (Output) int_t **
 *         Structure of U distributed on a 2D grid of processors, 
 *         stored by rows.
 * 
 * grid   (Input) gridinfo_t*
 *        The 2D process mesh.
 *
 * Return value
 * ============
 *   < 0, number of bytes allocated on return from the dist_symbLU.
 *   > 0, number of bytes allocated in this routine when out of memory.
 *        (an approximation).
 * 
*/ static float dist_symbLU (int_t n, Pslu_freeable_t *Pslu_freeable, Glu_persist_t *Glu_persist, int_t **p_xlsub, int_t **p_lsub, int_t **p_xusub, int_t **p_usub, gridinfo_t *grid ) { int iam, nprocs, pc, pr, p, np, p_diag; int_t *nnzToSend, *nnzToRecv, *nnzToSend_l, *nnzToSend_u, *tmp_ptrToSend, *mem; int_t *nnzToRecv_l, *nnzToRecv_u; int_t *send_1, *send_2, nsend_1, nsend_2; int_t *ptrToSend, *ptrToRecv, sendL, sendU, *snd_luind, *rcv_luind; int_t nsupers, nsupers_i, nsupers_j; int *nvtcs, *intBuf1, *intBuf2, *intBuf3, *intBuf4, intNvtcs_loc; int_t maxszsn, maxNvtcsPProc; int_t *xsup_n, *supno_n, *temp, *xsup_beg_s, *xsup_end_s, *supno_s; int_t *xlsub_s, *lsub_s, *xusub_s, *usub_s; int_t *xlsub_n, *lsub_n, *xusub_n, *usub_n; int_t *xsub_s, *sub_s, *xsub_n, *sub_n; int_t *globToLoc, nvtcs_loc; int_t SendCnt_l, SendCnt_u, nnz_loc_l, nnz_loc_u, nnz_loc, RecvCnt_l, RecvCnt_u, ind_loc; int_t i, k, j, gb, szsn, gb_n, gb_s, gb_l, fst_s, fst_s_l, lst_s, i_loc; int_t nelts, isize; float memAux; /* Memory used during this routine and freed on return */ float memRet; /* Memory allocated and not freed on return */ int_t iword, dword; /* ------------------------------------------------------------ INITIALIZATION. ------------------------------------------------------------*/ iam = grid->iam; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter dist_symbLU()"); #endif nprocs = (int) grid->nprow * grid->npcol; xlsub_s = Pslu_freeable->xlsub; lsub_s = Pslu_freeable->lsub; xusub_s = Pslu_freeable->xusub; usub_s = Pslu_freeable->usub; maxNvtcsPProc = Pslu_freeable->maxNvtcsPProc; globToLoc = Pslu_freeable->globToLoc; nvtcs_loc = Pslu_freeable->nvtcs_loc; xsup_beg_s = Pslu_freeable->xsup_beg_loc; xsup_end_s = Pslu_freeable->xsup_end_loc; supno_s = Pslu_freeable->supno_loc; rcv_luind = NULL; iword = sizeof(int_t); dword = sizeof(doublecomplex); memAux = 0.; memRet = 0.; mem = intCalloc_dist(12 * nprocs); if (!mem) return (ERROR_RET); memAux = (float) (12 * nprocs * sizeof(int_t)); nnzToRecv = mem; nnzToSend = nnzToRecv + 2*nprocs; nnzToSend_l = nnzToSend + 2 * nprocs; nnzToSend_u = nnzToSend_l + nprocs; send_1 = nnzToSend_u + nprocs; send_2 = send_1 + nprocs; tmp_ptrToSend = send_2 + nprocs; nnzToRecv_l = tmp_ptrToSend + nprocs; nnzToRecv_u = nnzToRecv_l + nprocs; ptrToSend = nnzToSend; ptrToRecv = nnzToSend + nprocs; nvtcs = (int *) SUPERLU_MALLOC(5 * nprocs * sizeof(int)); intBuf1 = nvtcs + nprocs; intBuf2 = nvtcs + 2 * nprocs; intBuf3 = nvtcs + 3 * nprocs; intBuf4 = nvtcs + 4 * nprocs; memAux += 5 * nprocs * sizeof(int); maxszsn = sp_ienv_dist(3); /* Allocate space for storing Glu_persist_n. */ if ( !(supno_n = intMalloc_dist(n+1)) ) { fprintf (stderr, "Malloc fails for supno_n[]."); return (memAux); } memRet += (float) ((n+1) * sizeof(int_t)); /* ------------------------------------------------------------ DETERMINE SUPERNODES FOR NUMERICAL FACTORIZATION ------------------------------------------------------------*/ if (nvtcs_loc > INT_MAX) ABORT("ERROR in dist_symbLU nvtcs_loc > INT_MAX\n"); intNvtcs_loc = (int) nvtcs_loc; MPI_Gather (&intNvtcs_loc, 1, MPI_INT, nvtcs, 1, MPI_INT, 0, grid->comm); if (!iam) { /* set ptrToRecv to point to the beginning of the data for each processor */ for (k = 0, p = 0; p < nprocs; p++) { ptrToRecv[p] = k; k += nvtcs[p]; } } if (nprocs > 1) { temp = NULL; if (!iam ) { if ( !(temp = intMalloc_dist (n+1)) ) { fprintf (stderr, "Malloc fails for temp[]."); return (memAux + memRet); } memAux += (float) (n+1) * iword; } #if defined (_LONGINT) for (p=0; p INT_MAX) ABORT("ERROR in dist_symbLU size to send > INT_MAX\n"); intBuf1[p] = (int) ptrToRecv[p]; } #else /* Default */ intBuf1 = ptrToRecv; #endif MPI_Gatherv (supno_s, (int) nvtcs_loc, mpi_int_t, temp, nvtcs, intBuf1, mpi_int_t, 0, grid->comm); } else temp = supno_s; if (!iam) { nsupers = 0; p = (int) OWNER( globToLoc[0] ); gb = temp[ptrToRecv[p]]; supno_n[0] = nsupers; ptrToRecv[p] ++; szsn = 1; for (j = 1; j < n; j ++) { if (p != (int) OWNER( globToLoc[j] ) || szsn >= maxszsn || gb != temp[ptrToRecv[p]]) { nsupers ++; p = (int) OWNER( globToLoc[j] ); gb = temp[ptrToRecv[p]]; szsn = 1; } else { szsn ++; } ptrToRecv[p] ++; supno_n[j] = nsupers; } nsupers++; if (nprocs > 1) { SUPERLU_FREE (temp); memAux -= (float) (n+1) * iword; } supno_n[n] = nsupers; } /* reset to 0 nnzToSend */ for (p = 0; p < 2 *nprocs; p++) nnzToSend[p] = 0; MPI_Bcast (supno_n, n+1, mpi_int_t, 0, grid->comm); nsupers = supno_n[n]; /* Allocate space for storing Glu_persist_n. */ if ( !(xsup_n = intMalloc_dist(nsupers+1)) ) { fprintf (stderr, "Malloc fails for xsup_n[]."); return (memAux + memRet); } memRet += (float) (nsupers+1) * iword; /* ------------------------------------------------------------ COUNT THE NUMBER OF NONZEROS TO BE SENT TO EACH PROCESS, THEN ALLOCATE SPACE. THIS ACCOUNTS FOR THE FIRST PASS OF L and U. ------------------------------------------------------------*/ gb = EMPTY; for (i = 0; i < n; i++) { if (gb != supno_n[i]) { /* a new supernode starts */ gb = supno_n[i]; xsup_n[gb] = i; } } xsup_n[nsupers] = n; for (p = 0; p < nprocs; p++) { send_1[p] = FALSE; send_2[p] = FALSE; } for (gb_n = 0; gb_n < nsupers; gb_n ++) { i = xsup_n[gb_n]; if (iam == (int) OWNER( globToLoc[i] )) { pc = PCOL( gb_n, grid ); pr = PROW( gb_n, grid ); p_diag = PNUM( pr, pc, grid); i_loc = LOCAL_IND( globToLoc[i] ); gb_s = supno_s[i_loc]; fst_s = xsup_beg_s[gb_s]; lst_s = xsup_end_s[gb_s]; fst_s_l = LOCAL_IND( globToLoc[fst_s] ); for (j = xlsub_s[fst_s_l]; j < xlsub_s[fst_s_l+1]; j++) { k = lsub_s[j]; if (k >= i) { gb = supno_n[k]; p = (int) PNUM( PROW(gb, grid), pc, grid ); nnzToSend[2*p] ++; send_1[p] = TRUE; } } for (j = xusub_s[fst_s_l]; j < xusub_s[fst_s_l+1]; j++) { k = usub_s[j]; if (k >= i + xsup_n[gb_n+1] - xsup_n[gb_n]) { gb = supno_n[k]; p = PNUM( pr, PCOL(gb, grid), grid); nnzToSend[2*p+1] ++; send_2[p] = TRUE; } } nsend_2 = 0; for (p = pr * grid->npcol; p < (pr + 1) * grid->npcol; p++) { nnzToSend[2*p+1] += 2; if (send_2[p]) nsend_2 ++; } for (p = pr * grid->npcol; p < (pr + 1) * grid->npcol; p++) if (send_2[p] || p == p_diag) { if (p == p_diag && !send_2[p]) nnzToSend[2*p+1] += nsend_2; else nnzToSend[2*p+1] += nsend_2-1; send_2[p] = FALSE; } nsend_1 = 0; for (p = pc; p < nprocs; p += grid->npcol) { nnzToSend[2*p] += 2; if (send_1[p]) nsend_1 ++; } for (p = pc; p < nprocs; p += grid->npcol) if (send_1[p]) { nnzToSend[2*p] += nsend_1-1; send_1[p] = FALSE; } else nnzToSend[2*p] += nsend_1; } } /* All-to-all communication */ MPI_Alltoall( nnzToSend, 2, mpi_int_t, nnzToRecv, 2, mpi_int_t, grid->comm); nnz_loc_l = nnz_loc_u = 0; SendCnt_l = SendCnt_u = RecvCnt_l = RecvCnt_u = 0; for (p = 0; p < nprocs; p++) { if ( p != iam ) { SendCnt_l += nnzToSend[2*p]; nnzToSend_l[p] = nnzToSend[2*p]; SendCnt_u += nnzToSend[2*p+1]; nnzToSend_u[p] = nnzToSend[2*p+1]; RecvCnt_l += nnzToRecv[2*p]; nnzToRecv_l[p] = nnzToRecv[2*p]; RecvCnt_u += nnzToRecv[2*p+1]; nnzToRecv_u[p] = nnzToRecv[2*p+1]; } else { nnz_loc_l += nnzToRecv[2*p]; nnz_loc_u += nnzToRecv[2*p+1]; nnzToSend_l[p] = 0; nnzToSend_u[p] = 0; nnzToRecv_l[p] = nnzToRecv[2*p]; nnzToRecv_u[p] = nnzToRecv[2*p+1]; } } /* Allocate space for storing the symbolic structure after redistribution. */ nsupers_i = CEILING( nsupers, grid->nprow ); /* Number of local block rows */ nsupers_j = CEILING( nsupers, grid->npcol ); /* Number of local block columns */ if ( !(xlsub_n = intCalloc_dist(nsupers_j+1)) ) { fprintf (stderr, "Malloc fails for xlsub_n[]."); return (memAux + memRet); } memRet += (float) (nsupers_j+1) * iword; if ( !(xusub_n = intCalloc_dist(nsupers_i+1)) ) { fprintf (stderr, "Malloc fails for xusub_n[]."); return (memAux + memRet); } memRet += (float) (nsupers_i+1) * iword; /* Allocate temp storage for sending/receiving the L/U symbolic structure. */ if ( (RecvCnt_l + nnz_loc_l) || (RecvCnt_u + nnz_loc_u) ) { if (!(rcv_luind = intMalloc_dist(SUPERLU_MAX(RecvCnt_l+nnz_loc_l, RecvCnt_u+nnz_loc_u))) ) { fprintf (stderr, "Malloc fails for rcv_luind[]."); return (memAux + memRet); } memAux += (float) SUPERLU_MAX(RecvCnt_l+nnz_loc_l, RecvCnt_u+nnz_loc_u) * iword; } if ( nprocs > 1 && (SendCnt_l || SendCnt_u) ) { if (!(snd_luind = intMalloc_dist(SUPERLU_MAX(SendCnt_l, SendCnt_u))) ) { fprintf (stderr, "Malloc fails for index[]."); return (memAux + memRet); } memAux += (float) SUPERLU_MAX(SendCnt_l, SendCnt_u) * iword; } /* ------------------------------------------------------------------ LOAD THE SYMBOLIC STRUCTURE OF L AND U INTO THE STRUCTURES TO SEND. THIS ACCOUNTS FOR THE SECOND PASS OF L and U. ------------------------------------------------------------------*/ sendL = TRUE; sendU = FALSE; while (sendL || sendU) { if (sendL) { xsub_s = xlsub_s; sub_s = lsub_s; xsub_n = xlsub_n; nnzToSend = nnzToSend_l; nnzToRecv = nnzToRecv_l; } if (sendU) { xsub_s = xusub_s; sub_s = usub_s; xsub_n = xusub_n; nnzToSend = nnzToSend_u; nnzToRecv = nnzToRecv_u; } for (i = 0, j = 0, p = 0; p < nprocs; p++) { if ( p != iam ) { ptrToSend[p] = i; i += nnzToSend[p]; } ptrToRecv[p] = j; j += nnzToRecv[p]; } nnzToRecv[iam] = 0; ind_loc = ptrToRecv[iam]; for (gb_n = 0; gb_n < nsupers; gb_n++) { nsend_2 = 0; i = xsup_n[gb_n]; if (iam == OWNER( globToLoc[i] )) { pc = PCOL( gb_n, grid ); pr = PROW( gb_n, grid ); p_diag = PNUM( pr, pc, grid ); i_loc = LOCAL_IND( globToLoc[i] ); gb_s = supno_s[i_loc]; fst_s = xsup_beg_s[gb_s]; lst_s = xsup_end_s[gb_s]; fst_s_l = LOCAL_IND( globToLoc[fst_s] ); if (sendL) { p = pc; np = grid->nprow; } else { p = pr * grid->npcol; np = grid->npcol; } for (j = 0; j < np; j++) { if (p == iam) { rcv_luind[ind_loc] = gb_n; rcv_luind[ind_loc+1] = 0; tmp_ptrToSend[p] = ind_loc + 1; ind_loc += 2; } else { snd_luind[ptrToSend[p]] = gb_n; snd_luind[ptrToSend[p]+1] = 0; tmp_ptrToSend[p] = ptrToSend[p] + 1; ptrToSend[p] += 2; } if (sendL) p += grid->npcol; if (sendU) p++; } for (j = xsub_s[fst_s_l]; j < xsub_s[fst_s_l+1]; j++) { k = sub_s[j]; if ((sendL && k >= i) || (sendU && k >= i + xsup_n[gb_n+1] - xsup_n[gb_n])) { gb = supno_n[k]; if (sendL) p = PNUM( PROW(gb, grid), pc, grid ); else p = PNUM( pr, PCOL(gb, grid), grid); if (send_1[p] == FALSE) { send_1[p] = TRUE; send_2[nsend_2] = k; nsend_2 ++; } if (p == iam) { rcv_luind[ind_loc] = k; ind_loc++; if (sendL) xsub_n[LBj( gb_n, grid )] ++; else xsub_n[LBi( gb_n, grid )] ++; } else { snd_luind[ptrToSend[p]] = k; ptrToSend[p] ++; snd_luind[tmp_ptrToSend[p]] ++; } } } if (sendL) for (p = pc; p < nprocs; p += grid->npcol) { for (k = 0; k < nsend_2; k++) { gb = supno_n[send_2[k]]; if (PNUM(PROW(gb, grid), pc, grid) != p) { if (p == iam) { rcv_luind[ind_loc] = send_2[k]; ind_loc++; xsub_n[LBj( gb_n, grid )] ++; } else { snd_luind[ptrToSend[p]] = send_2[k]; ptrToSend[p] ++; snd_luind[tmp_ptrToSend[p]] ++; } } } send_1[p] = FALSE; } if (sendU) for (p = pr * grid->npcol; p < (pr + 1) * grid->npcol; p++) { if (send_1[p] || p == p_diag) { for (k = 0; k < nsend_2; k++) { gb = supno_n[send_2[k]]; if(PNUM( pr, PCOL(gb, grid), grid) != p) { if (p == iam) { rcv_luind[ind_loc] = send_2[k]; ind_loc++; xsub_n[LBi( gb_n, grid )] ++; } else { snd_luind[ptrToSend[p]] = send_2[k]; ptrToSend[p] ++; snd_luind[tmp_ptrToSend[p]] ++; } } } send_1[p] = FALSE; } } } } /* reset ptrToSnd to point to the beginning of the data for each processor (structure needed in MPI_Alltoallv) */ for (i = 0, p = 0; p < nprocs; p++) { ptrToSend[p] = i; i += nnzToSend[p]; } /* ------------------------------------------------------------ PERFORM REDISTRIBUTION. THIS INVOLVES ALL-TO-ALL COMMUNICATION. Note: it uses MPI_Alltoallv. ------------------------------------------------------------*/ if (nprocs > 1) { #if defined (_LONGINT) nnzToSend[iam] = 0; for (p=0; p INT_MAX || ptrToSend[p] > INT_MAX || nnzToRecv[p] > INT_MAX || ptrToRecv[p] > INT_MAX) ABORT("ERROR in dist_symbLU size to send > INT_MAX\n"); intBuf1[p] = (int) nnzToSend[p]; intBuf2[p] = (int) ptrToSend[p]; intBuf3[p] = (int) nnzToRecv[p]; intBuf4[p] = (int) ptrToRecv[p]; } #else /* Default */ intBuf1 = nnzToSend; intBuf2 = ptrToSend; intBuf3 = nnzToRecv; intBuf4 = ptrToRecv; #endif MPI_Alltoallv (snd_luind, intBuf1, intBuf2, mpi_int_t, rcv_luind, intBuf3, intBuf4, mpi_int_t, grid->comm); } if (sendL) nnzToRecv[iam] = nnz_loc_l; else nnzToRecv[iam] = nnz_loc_u; /* ------------------------------------------------------------ DEALLOCATE TEMPORARY STORAGE. -------------------------------------------------------------*/ if (sendU) if ( nprocs > 1 && (SendCnt_l || SendCnt_u) ) { SUPERLU_FREE (snd_luind); memAux -= (float) SUPERLU_MAX(SendCnt_l, SendCnt_u) * iword; } /* ------------------------------------------------------------ CONVERT THE FORMAT. ------------------------------------------------------------*/ /* Initialize the array of column of L/ row of U pointers */ k = 0; for (p = 0; p < nprocs; p ++) { if (p != iam) { i = k; while (i < k + nnzToRecv[p]) { gb = rcv_luind[i]; nelts = rcv_luind[i+1]; if (sendL) xsub_n[LBj( gb, grid )] = nelts; else xsub_n[LBi( gb, grid )] = nelts; i += nelts + 2; } } k += nnzToRecv[p]; } if (sendL) j = nsupers_j; else j = nsupers_i; k = 0; isize = xsub_n[0]; xsub_n[0] = 0; for (gb_l = 1; gb_l < j; gb_l++) { k += isize; isize = xsub_n[gb_l]; xsub_n[gb_l] = k; } xsub_n[gb_l] = k + isize; nnz_loc = xsub_n[gb_l]; if (sendL) { lsub_n = NULL; if (nnz_loc) { if ( !(lsub_n = intMalloc_dist(nnz_loc)) ) { fprintf (stderr, "Malloc fails for lsub_n[]."); return (memAux + memRet); } memRet += (float) (nnz_loc * iword); } sub_n = lsub_n; } if (sendU) { usub_n = NULL; if (nnz_loc) { if ( !(usub_n = intMalloc_dist(nnz_loc)) ) { fprintf (stderr, "Malloc fails for usub_n[]."); return (memAux + memRet); } memRet += (float) (nnz_loc * iword); } sub_n = usub_n; } /* Copy the data into the L column / U row oriented storage */ k = 0; for (p = 0; p < nprocs; p++) { i = k; while (i < k + nnzToRecv[p]) { gb = rcv_luind[i]; if (gb >= nsupers) printf ("Pe[%d] p %d gb " IFMT " nsupers " IFMT " i " IFMT " i-k " IFMT "\n", iam, p, gb, nsupers, i, i-k); i += 2; if (sendL) gb_l = LBj( gb, grid ); if (sendU) gb_l = LBi( gb, grid ); for (j = xsub_n[gb_l]; j < xsub_n[gb_l+1]; i++, j++) { sub_n[j] = rcv_luind[i]; } } k += nnzToRecv[p]; } if (sendL) { sendL = FALSE; sendU = TRUE; } else sendU = FALSE; } /* deallocate memory allocated during symbolic factorization routine */ if (rcv_luind != NULL) { SUPERLU_FREE (rcv_luind); memAux -= (float) SUPERLU_MAX(RecvCnt_l+nnz_loc_l, RecvCnt_u+nnz_loc_u) * iword; } SUPERLU_FREE (mem); memAux -= (float) (12 * nprocs * iword); SUPERLU_FREE(nvtcs); memAux -= (float) (5 * nprocs * sizeof(int)); if (xlsub_s != NULL) { SUPERLU_FREE (xlsub_s); SUPERLU_FREE (lsub_s); } if (xusub_s != NULL) { SUPERLU_FREE (xusub_s); SUPERLU_FREE (usub_s); } SUPERLU_FREE (globToLoc); if (supno_s != NULL) { SUPERLU_FREE (xsup_beg_s); SUPERLU_FREE (xsup_end_s); SUPERLU_FREE (supno_s); } Glu_persist->supno = supno_n; Glu_persist->xsup = xsup_n; *p_xlsub = xlsub_n; *p_lsub = lsub_n; *p_xusub = xusub_n; *p_usub = usub_n; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit dist_symbLU()"); #endif return (-memRet); } /*! \brief * *
 * Purpose
 * =======
 *   Re-distribute A on the 2D process mesh.  The lower part is
 *   stored using a column format and the upper part
 *   is stored using a row format.
 * 
 * Arguments
 * =========
 * 
 * A      (Input) SuperMatrix*
 *	  The distributed input matrix A of dimension (A->nrow, A->ncol).
 *        The type of A can be: Stype = SLU_NR_loc; Dtype = SLU_Z; Mtype = SLU_GE.
 *
 * ScalePermstruct (Input) ScalePermstruct_t*
 *        The data structure to store the scaling and permutation vectors
 *        describing the transformations performed to the original matrix A.
 *
 * Glu_persist  (Input) Glu_persist_t *
 *        Information on supernodes mapping.
 * 
 * grid   (Input) gridinfo_t*
 *        The 2D process mesh.
 *
 * p_ainf_colptr (Output) int_t**
 *         Pointer to the lower part of A distributed on a 2D grid 
 *         of processors, stored by columns.
 *
 * p_ainf_rowind (Output) int_t**
 *         Structure of of the lower part of A distributed on a 
 *         2D grid of processors, stored by columns.
 *
 * p_ainf_val    (Output) doublecomplex**
 *         Numerical values of the lower part of A, distributed on a 
 *         2D grid of processors, stored by columns.
 *
 * p_asup_rowptr (Output) int_t**
 *         Pointer to the upper part of A distributed on a 2D grid 
 *         of processors, stored by rows.
 *
 * p_asup_colind (Output) int_t**
 *         Structure of of the upper part of A distributed on a 
 *         2D grid of processors, stored by rows.
 *
 * p_asup_val    (Output) doublecomplex**
 *         Numerical values of the upper part of A, distributed on a 
 *         2D grid of processors, stored by rows.
 *
 * ilsum_i  (Input) int_t *
 *       Starting position of each supernode in 
 *       the full array (local, block row wise).
 *
 * ilsum_j  (Input) int_t *
 *       Starting position of each supernode in 
 *       the full array (local, block column wise).
 *
 * Return value
 * ============
 *   < 0, number of bytes allocated on return from the dist_symbLU
 *   > 0, number of bytes allocated when out of memory.
 *        (an approximation).
 * 
*/ static float zdist_A(SuperMatrix *A, ScalePermstruct_t *ScalePermstruct, Glu_persist_t *Glu_persist, gridinfo_t *grid, int_t **p_ainf_colptr, int_t **p_ainf_rowind, doublecomplex **p_ainf_val, int_t **p_asup_rowptr, int_t **p_asup_colind, doublecomplex **p_asup_val, int_t *ilsum_i, int_t *ilsum_j ) { int iam, p, procs; NRformat_loc *Astore; int_t *perm_r; /* row permutation vector */ int_t *perm_c; /* column permutation vector */ int_t i, it, irow, fst_row, j, jcol, k, gbi, gbj, n, m_loc, jsize, isize; int_t nsupers, nsupers_i, nsupers_j; int_t nnz_loc, nnz_loc_ainf, nnz_loc_asup; /* number of local nonzeros */ int_t SendCnt; /* number of remote nonzeros to be sent */ int_t RecvCnt; /* number of remote nonzeros to be sent */ int_t *ainf_colptr, *ainf_rowind, *asup_rowptr, *asup_colind; doublecomplex *asup_val, *ainf_val; int_t *nnzToSend, *nnzToRecv, maxnnzToRecv; int_t *ia, *ja, **ia_send, *index, *itemp; int_t *ptr_to_send; doublecomplex *aij, **aij_send, *nzval, *dtemp; doublecomplex *nzval_a; MPI_Request *send_req; MPI_Status status; int_t *xsup = Glu_persist->xsup; /* supernode and column mapping */ int_t *supno = Glu_persist->supno; float memAux; /* Memory used during this routine and freed on return */ float memRet; /* Memory allocated and not freed on return */ int_t iword, dword, szbuf; /* ------------------------------------------------------------ INITIALIZATION. ------------------------------------------------------------*/ iam = grid->iam; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter zdist_A()"); #endif iword = sizeof(int_t); dword = sizeof(double); perm_r = ScalePermstruct->perm_r; perm_c = ScalePermstruct->perm_c; procs = grid->nprow * grid->npcol; Astore = (NRformat_loc *) A->Store; n = A->ncol; m_loc = Astore->m_loc; fst_row = Astore->fst_row; if (!(nnzToRecv = intCalloc_dist(2*procs))) { fprintf (stderr, "Malloc fails for nnzToRecv[]."); return (ERROR_RET); } memAux = (float) (2 * procs * iword); memRet = 0.; nnzToSend = nnzToRecv + procs; nsupers = supno[n-1] + 1; /* ------------------------------------------------------------ COUNT THE NUMBER OF NONZEROS TO BE SENT TO EACH PROCESS, THEN ALLOCATE SPACE. THIS ACCOUNTS FOR THE FIRST PASS OF A. ------------------------------------------------------------*/ for (i = 0; i < m_loc; ++i) { for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) { irow = perm_c[perm_r[i+fst_row]]; /* Row number in Pc*Pr*A */ jcol = Astore->colind[j]; gbi = BlockNum( irow ); gbj = BlockNum( jcol ); p = PNUM( PROW(gbi,grid), PCOL(gbj,grid), grid ); ++nnzToSend[p]; } } /* All-to-all communication */ MPI_Alltoall( nnzToSend, 1, mpi_int_t, nnzToRecv, 1, mpi_int_t, grid->comm); maxnnzToRecv = 0; nnz_loc = SendCnt = RecvCnt = 0; for (p = 0; p < procs; ++p) { if ( p != iam ) { SendCnt += nnzToSend[p]; RecvCnt += nnzToRecv[p]; maxnnzToRecv = SUPERLU_MAX( nnzToRecv[p], maxnnzToRecv ); } else { nnz_loc += nnzToRecv[p]; /*assert(nnzToSend[p] == nnzToRecv[p]);*/ } } k = nnz_loc + RecvCnt; /* Total nonzeros ended up in my process. */ szbuf = k; /* Allocate space for storing the triplets after redistribution. */ if ( !(ia = intMalloc_dist(2*k)) ) { fprintf (stderr, "Malloc fails for ia[]."); return (memAux); } memAux += (float) (2*k*iword); ja = ia + k; if ( !(aij = doublecomplexMalloc_dist(k)) ) { fprintf (stderr, "Malloc fails for aij[]."); return (memAux); } memAux += (float) (k*dword); /* Allocate temporary storage for sending/receiving the A triplets. */ if ( procs > 1 ) { if ( !(send_req = (MPI_Request *) SUPERLU_MALLOC(2*procs *sizeof(MPI_Request))) ) { fprintf (stderr, "Malloc fails for send_req[]."); return (memAux); } memAux += (float) (2*procs *sizeof(MPI_Request)); if ( !(ia_send = (int_t **) SUPERLU_MALLOC(procs*sizeof(int_t*))) ) { fprintf(stderr, "Malloc fails for ia_send[]."); return (memAux); } memAux += (float) (procs*sizeof(int_t*)); if ( !(aij_send = (doublecomplex **)SUPERLU_MALLOC(procs*sizeof(doublecomplex*))) ) { fprintf(stderr, "Malloc fails for aij_send[]."); return (memAux); } memAux += (float) (procs*sizeof(doublecomplex*)); if ( !(index = intMalloc_dist(2*SendCnt)) ) { fprintf(stderr, "Malloc fails for index[]."); return (memAux); } memAux += (float) (2*SendCnt*iword); if ( !(nzval = doublecomplexMalloc_dist(SendCnt)) ) { fprintf(stderr, "Malloc fails for nzval[]."); return (memAux); } memAux += (float) (SendCnt * dword); if ( !(ptr_to_send = intCalloc_dist(procs)) ) { fprintf(stderr, "Malloc fails for ptr_to_send[]."); return (memAux); } memAux += (float) (procs * iword); if ( !(itemp = intMalloc_dist(2*maxnnzToRecv)) ) { fprintf(stderr, "Malloc fails for itemp[]."); return (memAux); } memAux += (float) (2*maxnnzToRecv*iword); if ( !(dtemp = doublecomplexMalloc_dist(maxnnzToRecv)) ) { fprintf(stderr, "Malloc fails for dtemp[]."); return (memAux); } memAux += (float) (maxnnzToRecv * dword); for (i = 0, j = 0, p = 0; p < procs; ++p) { if ( p != iam ) { ia_send[p] = &index[i]; i += 2 * nnzToSend[p]; /* ia/ja indices alternate */ aij_send[p] = &nzval[j]; j += nnzToSend[p]; } } } /* if procs > 1 */ nsupers_i = CEILING( nsupers, grid->nprow ); /* Number of local block rows */ nsupers_j = CEILING( nsupers, grid->npcol ); /* Number of local block columns */ if ( !(ainf_colptr = intCalloc_dist(ilsum_j[nsupers_j] + 1)) ) { fprintf (stderr, "Malloc fails for *ainf_colptr[]."); return (memAux); } memRet += (float) (ilsum_j[nsupers_j] + 1) * iword; if ( !(asup_rowptr = intCalloc_dist(ilsum_i[nsupers_i] + 1)) ) { fprintf (stderr, "Malloc fails for *asup_rowptr[]."); return (memAux+memRet); } memRet += (float) (ilsum_i[nsupers_i] + 1) * iword; /* ------------------------------------------------------------ LOAD THE ENTRIES OF A INTO THE (IA,JA,AIJ) STRUCTURES TO SEND. THIS ACCOUNTS FOR THE SECOND PASS OF A. ------------------------------------------------------------*/ nnz_loc = 0; /* Reset the local nonzero count. */ nnz_loc_ainf = nnz_loc_asup = 0; nzval_a = Astore->nzval; for (i = 0; i < m_loc; ++i) { for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) { irow = perm_c[perm_r[i+fst_row]]; /* Row number in Pc*Pr*A */ jcol = Astore->colind[j]; gbi = BlockNum( irow ); gbj = BlockNum( jcol ); p = PNUM( PROW(gbi,grid), PCOL(gbj,grid), grid ); if ( p != iam ) { /* remote */ k = ptr_to_send[p]; ia_send[p][k] = irow; ia_send[p][k + nnzToSend[p]] = jcol; aij_send[p][k] = nzval_a[j]; ++ptr_to_send[p]; } else { /* local */ ia[nnz_loc] = irow; ja[nnz_loc] = jcol; aij[nnz_loc] = nzval_a[j]; ++nnz_loc; /* Count nonzeros in each column of L / row of U */ if (gbi >= gbj) { ainf_colptr[ilsum_j[LBj( gbj, grid )] + jcol - FstBlockC( gbj )] ++; nnz_loc_ainf ++; } else { asup_rowptr[ilsum_i[LBi( gbi, grid )] + irow - FstBlockC( gbi )] ++; nnz_loc_asup ++; } } } } /* ------------------------------------------------------------ PERFORM REDISTRIBUTION. THIS INVOLVES ALL-TO-ALL COMMUNICATION. NOTE: Can possibly use MPI_Alltoallv. ------------------------------------------------------------*/ for (p = 0; p < procs; ++p) { if ( p != iam ) { it = 2*nnzToSend[p]; MPI_Isend( ia_send[p], it, mpi_int_t, p, iam, grid->comm, &send_req[p] ); it = nnzToSend[p]; MPI_Isend( aij_send[p], it, SuperLU_MPI_DOUBLE_COMPLEX, p, iam+procs, grid->comm, &send_req[procs+p] ); } } for (p = 0; p < procs; ++p) { if ( p != iam ) { it = 2*nnzToRecv[p]; MPI_Recv( itemp, it, mpi_int_t, p, p, grid->comm, &status ); it = nnzToRecv[p]; MPI_Recv( dtemp, it, SuperLU_MPI_DOUBLE_COMPLEX, p, p+procs, grid->comm, &status ); for (i = 0; i < nnzToRecv[p]; ++i) { ia[nnz_loc] = itemp[i]; irow = itemp[i]; jcol = itemp[i + nnzToRecv[p]]; /* assert(jcol= gbj) { ainf_colptr[ilsum_j[LBj( gbj, grid )] + jcol - FstBlockC( gbj )] ++; nnz_loc_ainf ++; } else { asup_rowptr[ilsum_i[LBi( gbi, grid )] + irow - FstBlockC( gbi )] ++; nnz_loc_asup ++; } } } } for (p = 0; p < procs; ++p) { if ( p != iam ) { MPI_Wait( &send_req[p], &status); MPI_Wait( &send_req[procs+p], &status); } } /* ------------------------------------------------------------ DEALLOCATE TEMPORARY STORAGE ------------------------------------------------------------*/ SUPERLU_FREE(nnzToRecv); memAux -= 2 * procs * iword; if ( procs > 1 ) { SUPERLU_FREE(send_req); SUPERLU_FREE(ia_send); SUPERLU_FREE(aij_send); SUPERLU_FREE(index); SUPERLU_FREE(nzval); SUPERLU_FREE(ptr_to_send); SUPERLU_FREE(itemp); SUPERLU_FREE(dtemp); memAux -= 2*procs *sizeof(MPI_Request) + procs*sizeof(int_t*) + procs*sizeof(doublecomplex*) + 2*SendCnt * iword + SendCnt* dword + procs*iword + 2*maxnnzToRecv*iword + maxnnzToRecv*dword; } /* ------------------------------------------------------------ CONVERT THE TRIPLET FORMAT. ------------------------------------------------------------*/ if (nnz_loc_ainf != 0) { if ( !(ainf_rowind = intMalloc_dist(nnz_loc_ainf)) ) { fprintf (stderr, "Malloc fails for *ainf_rowind[]."); return (memAux+memRet); } memRet += (float) (nnz_loc_ainf * iword); if ( !(ainf_val = doublecomplexMalloc_dist(nnz_loc_ainf)) ) { fprintf (stderr, "Malloc fails for *ainf_val[]."); return (memAux+memRet); } memRet += (float) (nnz_loc_ainf * dword); } else { ainf_rowind = NULL; ainf_val = NULL; } if (nnz_loc_asup != 0) { if ( !(asup_colind = intMalloc_dist(nnz_loc_asup)) ) { fprintf (stderr, "Malloc fails for *asup_colind[]."); return (memAux + memRet); } memRet += (float) (nnz_loc_asup * iword); if ( !(asup_val = doublecomplexMalloc_dist(nnz_loc_asup)) ) { fprintf (stderr, "Malloc fails for *asup_val[]."); return (memAux + memRet); } memRet += (float) (nnz_loc_asup * dword); } else { asup_colind = NULL; asup_val = NULL; } /* Initialize the array of column pointers */ k = 0; jsize = ainf_colptr[0]; ainf_colptr[0] = 0; for (j = 1; j < ilsum_j[nsupers_j]; j++) { k += jsize; jsize = ainf_colptr[j]; ainf_colptr[j] = k; } ainf_colptr[ilsum_j[nsupers_j]] = k + jsize; i = 0; isize = asup_rowptr[0]; asup_rowptr[0] = 0; for (j = 1; j < ilsum_i[nsupers_i]; j++) { i += isize; isize = asup_rowptr[j]; asup_rowptr[j] = i; } asup_rowptr[ilsum_i[nsupers_i]] = i + isize; /* Copy the triplets into the column oriented storage */ for (i = 0; i < nnz_loc; ++i) { jcol = ja[i]; irow = ia[i]; gbi = BlockNum( irow ); gbj = BlockNum( jcol ); /* Count nonzeros in each column of L / row of U */ if (gbi >= gbj) { j = ilsum_j[LBj( gbj, grid )] + jcol - FstBlockC( gbj ); k = ainf_colptr[j]; ainf_rowind[k] = irow; ainf_val[k] = aij[i]; ainf_colptr[j] ++; } else { j = ilsum_i[LBi( gbi, grid )] + irow - FstBlockC( gbi ); k = asup_rowptr[j]; asup_colind[k] = jcol; asup_val[k] = aij[i]; asup_rowptr[j] ++; } } /* Reset the column pointers to the beginning of each column */ for (j = ilsum_j[nsupers_j]; j > 0; j--) ainf_colptr[j] = ainf_colptr[j-1]; for (j = ilsum_i[nsupers_i]; j > 0; j--) asup_rowptr[j] = asup_rowptr[j-1]; ainf_colptr[0] = 0; asup_rowptr[0] = 0; SUPERLU_FREE(ia); SUPERLU_FREE(aij); memAux -= 2*szbuf*iword + szbuf*dword; *p_ainf_colptr = ainf_colptr; *p_ainf_rowind = ainf_rowind; *p_ainf_val = ainf_val; *p_asup_rowptr = asup_rowptr; *p_asup_colind = asup_colind; *p_asup_val = asup_val; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit zdist_A()"); fprintf (stdout, "Size of allocated memory (MB) %.3f\n", memRet*1e-6); #endif return (-memRet); } /* dist_A */ /*! \brief * *
 * Purpose
 * =======
 *   Distribute the input matrix onto the 2D process mesh.
 * 
 * Arguments
 * =========
 * 
 * fact (input) fact_t
 *        Specifies whether or not the L and U structures will be re-used.
 *        = SamePattern_SameRowPerm: L and U structures are input, and
 *                                   unchanged on exit.
 *          This routine should not be called for this case, an error
 *          is generated.  Instead, pddistribute routine should be called.
 *        = DOFACT or SamePattern: L and U structures are computed and output.
 *
 * n      (Input) int
 *        Dimension of the matrix.
 *
 * A      (Input) SuperMatrix*
 *	  The distributed input matrix A of dimension (A->nrow, A->ncol).
 *        A may be overwritten by diag(R)*A*diag(C)*Pc^T.
 *        The type of A can be: Stype = NR; Dtype = SLU_D; Mtype = GE.
 *
 * ScalePermstruct (Input) ScalePermstruct_t*
 *        The data structure to store the scaling and permutation vectors
 *        describing the transformations performed to the original matrix A.
 *
 * Glu_freeable (Input) *Glu_freeable_t
 *        The global structure describing the graph of L and U.
 * 
 * LUstruct (Input) LUstruct_t*
 *        Data structures for L and U factors.
 *
 * grid   (Input) gridinfo_t*
 *        The 2D process mesh.
 *
 * Return value
 * ============
 *   < 0, number of bytes allocated on return from the dist_symbLU
 *   > 0, number of bytes allocated for performing the distribution
 *       of the data, when out of memory.
 *        (an approximation).
 * 
*/ float zdist_psymbtonum(fact_t fact, int_t n, SuperMatrix *A, ScalePermstruct_t *ScalePermstruct, Pslu_freeable_t *Pslu_freeable, LUstruct_t *LUstruct, gridinfo_t *grid) { Glu_persist_t *Glu_persist = LUstruct->Glu_persist; Glu_freeable_t Glu_freeable_n; LocalLU_t *Llu = LUstruct->Llu; int_t bnnz, fsupc, i, irow, istart, j, jb, jj, k, len, len1, nsupc, nsupc_gb, ii, nprocs; int_t ljb; /* local block column number */ int_t nrbl; /* number of L blocks in current block column */ int_t nrbu; /* number of U blocks in current block column */ int_t gb; /* global block number; 0 < gb <= nsuper */ int_t lb; /* local block number; 0 < lb <= ceil(NSUPERS/Pr) */ int iam, jbrow, jbcol, jcol, kcol, mycol, myrow, pc, pr, ljb_i, ljb_j, p; int_t mybufmax[NBUFFERS]; NRformat_loc *Astore; doublecomplex *a; int_t *asub, *xa; int_t *ainf_colptr, *ainf_rowind, *asup_rowptr, *asup_colind; doublecomplex *asup_val, *ainf_val; int_t *xsup, *supno; /* supernode and column mapping */ int_t *lsub, *xlsub, *usub, *xusub; int_t nsupers, nsupers_i, nsupers_j, nsupers_ij; int_t next_ind; /* next available position in index[*] */ int_t next_val; /* next available position in nzval[*] */ int_t *index; /* indices consist of headers and row subscripts */ int *index1; /* temporary pointer to array of int */ doublecomplex *lusup, *uval; /* nonzero values in L and U */ int_t *recvBuf; int *ptrToRecv, *nnzToRecv, *ptrToSend, *nnzToSend; doublecomplex **Lnzval_bc_ptr; /* size ceil(NSUPERS/Pc) */ int_t **Lrowind_bc_ptr; /* size ceil(NSUPERS/Pc) */ doublecomplex **Unzval_br_ptr; /* size ceil(NSUPERS/Pr) */ int_t **Ufstnz_br_ptr; /* size ceil(NSUPERS/Pr) */ /*-- Counts to be used in factorization. --*/ int *ToRecv, *ToSendD, **ToSendR; /*-- Counts to be used in lower triangular solve. --*/ int_t *fmod; /* Modification count for L-solve. */ int_t **fsendx_plist; /* Column process list to send down Xk. */ int_t nfrecvx = 0; /* Number of Xk I will receive. */ int_t nfsendx = 0; /* Number of Xk I will send */ int_t kseen; /*-- Counts to be used in upper triangular solve. --*/ int_t *bmod; /* Modification count for U-solve. */ int_t **bsendx_plist; /* Column process list to send down Xk. */ int_t nbrecvx = 0; /* Number of Xk I will receive. */ int_t nbsendx = 0; /* Number of Xk I will send */ int_t *ilsum; /* starting position of each supernode in the full array (local) */ int_t *ilsum_j, ldaspa_j; /* starting position of each supernode in the full array (local, block column wise) */ /*-- Auxiliary arrays; freed on return --*/ int_t *Urb_marker; /* block hit marker; size ceil(NSUPERS/Pr) */ int_t *LUb_length; /* L,U block length; size nsupers_ij */ int_t *LUb_indptr; /* pointers to L,U index[]; size nsupers_ij */ int_t *LUb_number; /* global block number; size nsupers_ij */ int_t *LUb_valptr; /* pointers to U nzval[]; size ceil(NSUPERS/Pc) */ int_t *Lrb_marker; /* block hit marker; size ceil(NSUPERS/Pr) */ doublecomplex *dense, *dense_col; /* SPA */ doublecomplex zero = {0.0, 0.0}; int_t ldaspa; /* LDA of SPA */ int_t iword, dword; float memStrLU, memA, memDist = 0.; /* memory used for redistributing the data, which does not include the memory for the numerical values of L and U (positive number)*/ float memNLU = 0.; /* memory allocated for storing the numerical values of L and U, that will be used in the numeric factorization (positive number) */ #if ( PRNTlevel>=1 ) int_t nLblocks = 0, nUblocks = 0; #endif /* Initialization. */ iam = grid->iam; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter dist_psymbtonum()"); #endif myrow = MYROW( iam, grid ); mycol = MYCOL( iam, grid ); nprocs = grid->npcol * grid->nprow; for (i = 0; i < NBUFFERS; ++i) mybufmax[i] = 0; Astore = (NRformat_loc *) A->Store; iword = sizeof(int_t); dword = sizeof(doublecomplex); if (fact == SamePattern_SameRowPerm) { ABORT ("ERROR: call of dist_psymbtonum with fact equals SamePattern_SameRowPerm."); } if ((memStrLU = dist_symbLU (n, Pslu_freeable, Glu_persist, &xlsub, &lsub, &xusub, &usub, grid)) > 0) return (memStrLU); memDist += (-memStrLU); xsup = Glu_persist->xsup; /* supernode and column mapping */ supno = Glu_persist->supno; nsupers = supno[n-1] + 1; nsupers_i = CEILING( nsupers, grid->nprow );/* No of local row blocks */ nsupers_j = CEILING( nsupers, grid->npcol );/* No of local column blocks */ nsupers_ij = SUPERLU_MAX(nsupers_i, nsupers_j); if ( !(ilsum = intMalloc_dist(nsupers_i+1)) ) { fprintf (stderr, "Malloc fails for ilsum[]."); return (memDist + memNLU); } memNLU += (nsupers_i+1) * iword; if ( !(ilsum_j = intMalloc_dist(nsupers_j+1)) ) { fprintf (stderr, "Malloc fails for ilsum_j[]."); return (memDist + memNLU); } memDist += (nsupers_j+1) * iword; /* Compute ldaspa and ilsum[], ldaspa_j and ilsum_j[]. */ ilsum[0] = 0; ldaspa = 0; for (gb = 0; gb < nsupers; gb++) if ( myrow == PROW( gb, grid ) ) { i = SuperSize( gb ); ldaspa += i; lb = LBi( gb, grid ); ilsum[lb + 1] = ilsum[lb] + i; } ilsum[nsupers_i] = ldaspa; ldaspa_j = 0; ilsum_j[0] = 0; for (gb = 0; gb < nsupers; gb++) if (mycol == PCOL( gb, grid )) { i = SuperSize( gb ); ldaspa_j += i; lb = LBj( gb, grid ); ilsum_j[lb + 1] = ilsum_j[lb] + i; } ilsum_j[nsupers_j] = ldaspa_j; if ((memA = zdist_A(A, ScalePermstruct, Glu_persist, grid, &ainf_colptr, &ainf_rowind, &ainf_val, &asup_rowptr, &asup_colind, &asup_val, ilsum, ilsum_j)) > 0) return (memDist + memA + memNLU); memDist += (-memA); /* ------------------------------------------------------------ FIRST TIME CREATING THE L AND U DATA STRUCTURES. ------------------------------------------------------------*/ /* We first need to set up the L and U data structures and then * propagate the values of A into them. */ if ( !(ToRecv = SUPERLU_MALLOC(nsupers * sizeof(int))) ) { fprintf(stderr, "Calloc fails for ToRecv[]."); return (memDist + memNLU); } for (i = 0; i < nsupers; ++i) ToRecv[i] = 0; memNLU += nsupers * iword; k = CEILING( nsupers, grid->npcol ); /* Number of local column blocks */ if ( !(ToSendR = (int **) SUPERLU_MALLOC(k*sizeof(int*))) ) { fprintf(stderr, "Malloc fails for ToSendR[]."); return (memDist + memNLU); } memNLU += k*sizeof(int_t*); j = k * grid->npcol; if ( !(index1 = SUPERLU_MALLOC(j * sizeof(int))) ) { fprintf(stderr, "Malloc fails for index[]."); return (memDist + memNLU); } memNLU += j*iword; for (i = 0; i < j; ++i) index1[i] = EMPTY; for (i = 0,j = 0; i < k; ++i, j += grid->npcol) ToSendR[i] = &index1[j]; /* Auxiliary arrays used to set up L and U block data structures. They are freed on return. */ if ( !(LUb_length = intCalloc_dist(nsupers_ij)) ) { fprintf(stderr, "Calloc fails for LUb_length[]."); return (memDist + memNLU); } if ( !(LUb_indptr = intMalloc_dist(nsupers_ij)) ) { fprintf(stderr, "Malloc fails for LUb_indptr[]."); return (memDist + memNLU); } if ( !(LUb_number = intCalloc_dist(nsupers_ij)) ) { fprintf(stderr, "Calloc fails for LUb_number[]."); return (memDist + memNLU); } if ( !(LUb_valptr = intCalloc_dist(nsupers_ij)) ) { fprintf(stderr, "Calloc fails for LUb_valptr[]."); return (memDist + memNLU); } memDist += 4 * nsupers_ij * iword; k = CEILING( nsupers, grid->nprow ); /* Pointers to the beginning of each block row of U. */ if ( !(Unzval_br_ptr = (doublecomplex**)SUPERLU_MALLOC(nsupers_i * sizeof(doublecomplex*))) ) { fprintf(stderr, "Malloc fails for Unzval_br_ptr[]."); return (memDist + memNLU); } if ( !(Ufstnz_br_ptr = (int_t**)SUPERLU_MALLOC(nsupers_i * sizeof(int_t*))) ) { fprintf(stderr, "Malloc fails for Ufstnz_br_ptr[]."); return (memDist + memNLU); } memNLU += nsupers_i*sizeof(doublecomplex*) + nsupers_i*sizeof(int_t*); Unzval_br_ptr[nsupers_i-1] = NULL; Ufstnz_br_ptr[nsupers_i-1] = NULL; if ( !(ToSendD = SUPERLU_MALLOC(nsupers_i * sizeof(int))) ) { fprintf(stderr, "Malloc fails for ToSendD[]."); return (memDist + memNLU); } for (i = 0; i < nsupers_i; ++i) ToSendD[i] = NO; memNLU += nsupers_i*iword; if ( !(Urb_marker = intCalloc_dist(nsupers_j))) { fprintf(stderr, "Calloc fails for rb_marker[]."); return (memDist + memNLU); } if ( !(Lrb_marker = intCalloc_dist( nsupers_i ))) { fprintf(stderr, "Calloc fails for rb_marker[]."); return (memDist + memNLU); } memDist += (nsupers_i + nsupers_j)*iword; /* Auxiliary arrays used to set up L, U block data structures. They are freed on return. k is the number of local row blocks. */ if ( !(dense = doublecomplexCalloc_dist(SUPERLU_MAX(ldaspa, ldaspa_j) * sp_ienv_dist(3))) ) { fprintf(stderr, "Calloc fails for SPA dense[]."); return (memDist + memNLU); } /* These counts will be used for triangular solves. */ if ( !(fmod = intCalloc_dist(nsupers_i)) ) { fprintf(stderr, "Calloc fails for fmod[]."); return (memDist + memNLU); } if ( !(bmod = intCalloc_dist(nsupers_i)) ) { fprintf(stderr, "Calloc fails for bmod[]."); return (memDist + memNLU); } /* ------------------------------------------------ */ memNLU += 2*nsupers_i*iword + SUPERLU_MAX(ldaspa, ldaspa_j)*sp_ienv_dist(3)*dword; /* Pointers to the beginning of each block column of L. */ if ( !(Lnzval_bc_ptr = (doublecomplex**)SUPERLU_MALLOC(nsupers_j * sizeof(doublecomplex*))) ) { fprintf(stderr, "Malloc fails for Lnzval_bc_ptr[]."); return (memDist + memNLU); } if ( !(Lrowind_bc_ptr = (int_t**)SUPERLU_MALLOC(nsupers_j * sizeof(int_t*))) ) { fprintf(stderr, "Malloc fails for Lrowind_bc_ptr[]."); return (memDist + memNLU); } memNLU += nsupers_j * sizeof(doublecomplex*) + nsupers_j * sizeof(int_t*); Lnzval_bc_ptr[nsupers_j-1] = NULL; Lrowind_bc_ptr[nsupers_j-1] = NULL; /* These lists of processes will be used for triangular solves. */ if ( !(fsendx_plist = (int_t **) SUPERLU_MALLOC(nsupers_j*sizeof(int_t*))) ) { fprintf(stderr, "Malloc fails for fsendx_plist[]."); return (memDist + memNLU); } len = nsupers_j * grid->nprow; if ( !(index = intMalloc_dist(len)) ) { fprintf(stderr, "Malloc fails for fsendx_plist[0]"); return (memDist + memNLU); } for (i = 0; i < len; ++i) index[i] = EMPTY; for (i = 0, j = 0; i < nsupers_j; ++i, j += grid->nprow) fsendx_plist[i] = &index[j]; if ( !(bsendx_plist = (int_t **) SUPERLU_MALLOC(nsupers_j*sizeof(int_t*))) ) { fprintf(stderr, "Malloc fails for bsendx_plist[]."); return (memDist + memNLU); } if ( !(index = intMalloc_dist(len)) ) { fprintf(stderr, "Malloc fails for bsendx_plist[0]"); return (memDist + memNLU); } for (i = 0; i < len; ++i) index[i] = EMPTY; for (i = 0, j = 0; i < nsupers_j; ++i, j += grid->nprow) bsendx_plist[i] = &index[j]; /* -------------------------------------------------------------- */ memNLU += 2*nsupers_j*sizeof(int_t*) + 2*len*iword; /*------------------------------------------------------------ PROPAGATE ROW SUBSCRIPTS AND VALUES OF A INTO L AND U BLOCKS. THIS ACCOUNTS FOR ONE-PASS PROCESSING OF A, L AND U. ------------------------------------------------------------*/ for (jb = 0; jb < nsupers; jb++) { jbcol = PCOL( jb, grid ); jbrow = PROW( jb, grid ); ljb_j = LBj( jb, grid ); /* Local block number column wise */ ljb_i = LBi( jb, grid); /* Local block number row wise */ fsupc = FstBlockC( jb ); nsupc = SuperSize( jb ); if ( myrow == jbrow ) { /* Block row jb in my process row */ /* Scatter A into SPA. */ for (j = ilsum[ljb_i], dense_col = dense; j < ilsum[ljb_i]+nsupc; j++) { for (i = asup_rowptr[j]; i < asup_rowptr[j+1]; i++) { if (i >= asup_rowptr[ilsum[nsupers_i]]) printf ("ERR7\n"); jcol = asup_colind[i]; if (jcol >= n) printf ("Pe[%d] ERR distsn jb " IFMT " gb " IFMT " j " IFMT " jcol %d\n", iam, jb, gb, j, jcol); gb = BlockNum( jcol ); lb = LBj( gb, grid ); if (gb >= nsupers || lb >= nsupers_j) printf ("ERR8\n"); jcol = ilsum_j[lb] + jcol - FstBlockC( gb ); if (jcol >= ldaspa_j) printf ("Pe[%d] ERR1 jb " IFMT " gb " IFMT " j " IFMT " jcol %d\n", iam, jb, gb, j, jcol); dense_col[jcol] = asup_val[i]; } dense_col += ldaspa_j; } /*------------------------------------------------ * SET UP U BLOCKS. *------------------------------------------------*/ /* Count number of blocks and length of each block. */ nrbu = 0; len = 0; /* Number of column subscripts I own. */ len1 = 0; /* number of fstnz subscripts */ for (i = xusub[ljb_i]; i < xusub[ljb_i+1]; i++) { if (i >= xusub[nsupers_i]) printf ("ERR10\n"); jcol = usub[i]; gb = BlockNum( jcol ); /* Global block number */ /*if (fsupc <= 146445 && 146445 < fsupc + nsupc && jcol == 397986) printf ("Pe[%d] [%d %d] elt [%d] jbcol %d pc %d\n", iam, jb, gb, jcol, jbcol, pc); */ lb = LBj( gb, grid ); /* Local block number */ pc = PCOL( gb, grid ); /* Process col owning this block */ if (mycol == jbcol) ToSendR[ljb_j][pc] = YES; /* if (mycol == jbcol && mycol != pc) ToSendR[ljb_j][pc] = YES; */ pr = PROW( gb, grid ); if ( pr != jbrow && mycol == pc) bsendx_plist[lb][jbrow] = YES; if (mycol == pc) { len += nsupc; LUb_length[lb] += nsupc; ToSendD[ljb_i] = YES; if (Urb_marker[lb] <= jb) { /* First see this block */ if (Urb_marker[lb] == FALSE && gb != jb && myrow != pr) nbrecvx ++; Urb_marker[lb] = jb + 1; LUb_number[nrbu] = gb; /* if (gb == 391825 && jb == 145361) printf ("Pe[%d] T1 [%d %d] nrbu %d \n", iam, jb, gb, nrbu); */ nrbu ++; len1 += SuperSize( gb ); if ( gb != jb )/* Exclude diagonal block. */ ++bmod[ljb_i];/* Mod. count for back solve */ #if ( PRNTlevel>=1 ) ++nUblocks; #endif } } } /* for i ... */ if ( nrbu ) { /* Sort the blocks of U in increasing block column index. SuperLU_DIST assumes this is true */ /* simple insert sort algorithm */ /* to be transformed in quick sort */ for (j = 1; j < nrbu; j++) { k = LUb_number[j]; for (i=j-1; i>=0 && LUb_number[i] > k; i--) { LUb_number[i+1] = LUb_number[i]; } LUb_number[i+1] = k; } /* Set up the initial pointers for each block in index[] and nzval[]. */ /* Add room for descriptors */ len1 += BR_HEADER + nrbu * UB_DESCRIPTOR; if ( !(index = intMalloc_dist(len1+1)) ) { fprintf (stderr, "Malloc fails for Uindex[]"); return (memDist + memNLU); } Ufstnz_br_ptr[ljb_i] = index; if (!(Unzval_br_ptr[ljb_i] = doublecomplexMalloc_dist(len))) { fprintf (stderr, "Malloc fails for Unzval_br_ptr[*][]"); return (memDist + memNLU); } memNLU += (len1+1)*iword + len*dword; uval = Unzval_br_ptr[ljb_i]; mybufmax[2] = SUPERLU_MAX( mybufmax[2], len1 ); mybufmax[3] = SUPERLU_MAX( mybufmax[3], len ); index[0] = nrbu; /* Number of column blocks */ index[1] = len; /* Total length of nzval[] */ index[2] = len1; /* Total length of index */ index[len1] = -1; /* End marker */ next_ind = BR_HEADER; next_val = 0; for (k = 0; k < nrbu; k++) { gb = LUb_number[k]; lb = LBj( gb, grid ); len = LUb_length[lb]; LUb_length[lb] = 0; /* Reset vector of block length */ index[next_ind++] = gb; /* Descriptor */ index[next_ind++] = len; LUb_indptr[lb] = next_ind; for (; next_ind < LUb_indptr[lb] + SuperSize( gb ); next_ind++) index[next_ind] = FstBlockC( jb + 1 ); LUb_valptr[lb] = next_val; next_val += len; } /* Propagate the fstnz subscripts to Ufstnz_br_ptr[], and the initial values of A from SPA into Unzval_br_ptr[]. */ for (i = xusub[ljb_i]; i < xusub[ljb_i+1]; i++) { jcol = usub[i]; gb = BlockNum( jcol ); if ( mycol == PCOL( gb, grid ) ) { lb = LBj( gb, grid ); k = LUb_indptr[lb]; /* Start fstnz in index */ index[k + jcol - FstBlockC( gb )] = FstBlockC( jb ); } } /* for i ... */ for (i = 0; i < nrbu; i++) { gb = LUb_number[i]; lb = LBj( gb, grid ); next_ind = LUb_indptr[lb]; k = FstBlockC( jb + 1); jcol = ilsum_j[lb]; for (jj = 0; jj < SuperSize( gb ); jj++, jcol++) { dense_col = dense; j = index[next_ind+jj]; for (ii = j; ii < k; ii++) { uval[LUb_valptr[lb]++] = dense_col[jcol]; dense_col[jcol] = zero; dense_col += ldaspa_j; } } } } else { Ufstnz_br_ptr[ljb_i] = NULL; Unzval_br_ptr[ljb_i] = NULL; } /* if nrbu ... */ } /* if myrow == jbrow */ /*------------------------------------------------ * SET UP L BLOCKS. *------------------------------------------------*/ if (mycol == jbcol) { /* Block column jb in my process column */ /* Scatter A_inf into SPA. */ for (j = ilsum_j[ljb_j], dense_col = dense; j < ilsum_j[ljb_j] + nsupc; j++) { for (i = ainf_colptr[j]; i < ainf_colptr[j+1]; i++) { irow = ainf_rowind[i]; if (irow >= n) printf ("Pe[%d] ERR1\n", iam); gb = BlockNum( irow ); if (gb >= nsupers) printf ("Pe[%d] ERR5\n", iam); if ( myrow == PROW( gb, grid ) ) { lb = LBi( gb, grid ); irow = ilsum[lb] + irow - FstBlockC( gb ); if (irow >= ldaspa) printf ("Pe[%d] ERR0\n", iam); dense_col[irow] = ainf_val[i]; } } dense_col += ldaspa; } /* sort the indices of the diagonal block at the beginning of xlsub */ if (myrow == jbrow) { k = xlsub[ljb_j]; for (i = xlsub[ljb_j]; i < xlsub[ljb_j+1]; i++) { irow = lsub[i]; if (irow < nsupc + fsupc && i != k+irow-fsupc) { lsub[i] = lsub[k + irow - fsupc]; lsub[k + irow - fsupc] = irow; i --; } } } /* Count number of blocks and length of each block. */ nrbl = 0; len = 0; /* Number of row subscripts I own. */ kseen = 0; for (i = xlsub[ljb_j]; i < xlsub[ljb_j+1]; i++) { irow = lsub[i]; gb = BlockNum( irow ); /* Global block number */ pr = PROW( gb, grid ); /* Process row owning this block */ if ( pr != jbrow && fsendx_plist[ljb_j][pr] == EMPTY && myrow == jbrow) { fsendx_plist[ljb_j][pr] = YES; ++nfsendx; } if ( myrow == pr ) { lb = LBi( gb, grid ); /* Local block number */ if (Lrb_marker[lb] <= jb) { /* First see this block */ Lrb_marker[lb] = jb + 1; LUb_length[lb] = 1; LUb_number[nrbl++] = gb; if ( gb != jb ) /* Exclude diagonal block. */ ++fmod[lb]; /* Mod. count for forward solve */ if ( kseen == 0 && myrow != jbrow ) { ++nfrecvx; kseen = 1; } #if ( PRNTlevel>=1 ) ++nLblocks; #endif } else ++LUb_length[lb]; ++len; } } /* for i ... */ if ( nrbl ) { /* Do not ensure the blocks are sorted! */ /* Set up the initial pointers for each block in index[] and nzval[]. */ /* If I am the owner of the diagonal block, order it first in LUb_number. Necessary for SuperLU_DIST routines */ kseen = EMPTY; for (j = 0; j < nrbl; j++) { if (LUb_number[j] == jb) kseen = j; } if (kseen != EMPTY && kseen != 0) { LUb_number[kseen] = LUb_number[0]; LUb_number[0] = jb; } /* Add room for descriptors */ len1 = len + BC_HEADER + nrbl * LB_DESCRIPTOR; if ( !(index = intMalloc_dist(len1)) ) { fprintf (stderr, "Malloc fails for index[]"); return (memDist + memNLU); } Lrowind_bc_ptr[ljb_j] = index; if (!(Lnzval_bc_ptr[ljb_j] = doublecomplexMalloc_dist(len*nsupc))) { fprintf(stderr, "Malloc fails for Lnzval_bc_ptr[*][] col block " IFMT, jb); return (memDist + memNLU); } memNLU += len1*iword + len*nsupc*dword; lusup = Lnzval_bc_ptr[ljb_j]; mybufmax[0] = SUPERLU_MAX( mybufmax[0], len1 ); mybufmax[1] = SUPERLU_MAX( mybufmax[1], len*nsupc ); mybufmax[4] = SUPERLU_MAX( mybufmax[4], len ); index[0] = nrbl; /* Number of row blocks */ index[1] = len; /* LDA of the nzval[] */ next_ind = BC_HEADER; next_val = 0; for (k = 0; k < nrbl; ++k) { gb = LUb_number[k]; lb = LBi( gb, grid ); len = LUb_length[lb]; LUb_length[lb] = 0; index[next_ind++] = gb; /* Descriptor */ index[next_ind++] = len; LUb_indptr[lb] = next_ind; LUb_valptr[lb] = next_val; next_ind += len; next_val += len; } /* Propagate the compressed row subscripts to Lindex[], and the initial values of A from SPA into Lnzval[]. */ len = index[1]; /* LDA of lusup[] */ for (i = xlsub[ljb_j]; i < xlsub[ljb_j+1]; i++) { irow = lsub[i]; gb = BlockNum( irow ); if ( myrow == PROW( gb, grid ) ) { lb = LBi( gb, grid ); k = LUb_indptr[lb]++; /* Random access a block */ index[k] = irow; k = LUb_valptr[lb]++; irow = ilsum[lb] + irow - FstBlockC( gb ); for (j = 0, dense_col = dense; j < nsupc; ++j) { lusup[k] = dense_col[irow]; dense_col[irow] = zero; k += len; dense_col += ldaspa; } } } /* for i ... */ } else { Lrowind_bc_ptr[ljb_j] = NULL; Lnzval_bc_ptr[ljb_j] = NULL; } /* if nrbl ... */ } /* if mycol == pc */ } /* for jb ... */ SUPERLU_FREE(ilsum_j); SUPERLU_FREE(Urb_marker); SUPERLU_FREE(LUb_length); SUPERLU_FREE(LUb_indptr); SUPERLU_FREE(LUb_number); SUPERLU_FREE(LUb_valptr); SUPERLU_FREE(Lrb_marker); SUPERLU_FREE(dense); /* Free the memory used for storing L and U */ SUPERLU_FREE(xlsub); SUPERLU_FREE(xusub); if (lsub != NULL) SUPERLU_FREE(lsub); if (usub != NULL) SUPERLU_FREE(usub); /* Free the memory used for storing A */ SUPERLU_FREE(ainf_colptr); if (ainf_rowind != NULL) { SUPERLU_FREE(ainf_rowind); SUPERLU_FREE(ainf_val); } SUPERLU_FREE(asup_rowptr); if (asup_colind != NULL) { SUPERLU_FREE(asup_colind); SUPERLU_FREE(asup_val); } /* exchange information about bsendx_plist in between column of processors */ k = SUPERLU_MAX( grid->nprow, grid->npcol); if ( !(recvBuf = (int_t *) SUPERLU_MALLOC(nsupers*k*iword)) ) { fprintf (stderr, "Malloc fails for recvBuf[]."); return (memDist + memNLU); } if ( !(nnzToRecv = (int *) SUPERLU_MALLOC(nprocs*sizeof(int))) ) { fprintf (stderr, "Malloc fails for nnzToRecv[]."); return (memDist + memNLU); } if ( !(ptrToRecv = (int *) SUPERLU_MALLOC(nprocs*sizeof(int))) ) { fprintf (stderr, "Malloc fails for ptrToRecv[]."); return (memDist + memNLU); } if ( !(nnzToSend = (int *) SUPERLU_MALLOC(nprocs*sizeof(int))) ) { fprintf (stderr, "Malloc fails for nnzToRecv[]."); return (memDist + memNLU); } if ( !(ptrToSend = (int *) SUPERLU_MALLOC(nprocs*sizeof(int))) ) { fprintf (stderr, "Malloc fails for ptrToRecv[]."); return (memDist + memNLU); } if (memDist < (nsupers*k*iword +4*nprocs * sizeof(int))) memDist = nsupers*k*iword +4*nprocs * sizeof(int); for (p = 0; p < nprocs; p++) nnzToRecv[p] = 0; for (jb = 0; jb < nsupers; jb++) { jbcol = PCOL( jb, grid ); jbrow = PROW( jb, grid ); p = PNUM(jbrow, jbcol, grid); nnzToRecv[p] += grid->npcol; } i = 0; for (p = 0; p < nprocs; p++) { ptrToRecv[p] = i; i += nnzToRecv[p]; ptrToSend[p] = 0; if (p != iam) nnzToSend[p] = nnzToRecv[iam]; else nnzToSend[p] = 0; } nnzToRecv[iam] = 0; i = ptrToRecv[iam]; for (jb = 0; jb < nsupers; jb++) { jbcol = PCOL( jb, grid ); jbrow = PROW( jb, grid ); p = PNUM(jbrow, jbcol, grid); if (p == iam) { ljb_j = LBj( jb, grid ); /* Local block number column wise */ for (j = 0; j < grid->npcol; j++, i++) recvBuf[i] = ToSendR[ljb_j][j]; } } MPI_Alltoallv (&(recvBuf[ptrToRecv[iam]]), nnzToSend, ptrToSend, mpi_int_t, recvBuf, nnzToRecv, ptrToRecv, mpi_int_t, grid->comm); for (jb = 0; jb < nsupers; jb++) { jbcol = PCOL( jb, grid ); jbrow = PROW( jb, grid ); p = PNUM(jbrow, jbcol, grid); ljb_j = LBj( jb, grid ); /* Local block number column wise */ ljb_i = LBi( jb, grid ); /* Local block number row wise */ /* (myrow == jbrow) { if (ToSendD[ljb_i] == YES) ToRecv[jb] = 1; } else { if (recvBuf[ptrToRecv[p] + mycol] == YES) ToRecv[jb] = 2; } */ if (recvBuf[ptrToRecv[p] + mycol] == YES) { if (myrow == jbrow) ToRecv[jb] = 1; else ToRecv[jb] = 2; } if (mycol == jbcol) { for (i = 0, j = ptrToRecv[p]; i < grid->npcol; i++, j++) ToSendR[ljb_j][i] = recvBuf[j]; ToSendR[ljb_j][mycol] = EMPTY; } ptrToRecv[p] += grid->npcol; } /* exchange information about bsendx_plist in between column of processors */ MPI_Allreduce ((*bsendx_plist), recvBuf, nsupers_j * grid->nprow, mpi_int_t, MPI_MAX, grid->cscp.comm); for (jb = 0; jb < nsupers; jb ++) { jbcol = PCOL( jb, grid); jbrow = PROW( jb, grid); if (mycol == jbcol) { ljb_j = LBj( jb, grid ); /* Local block number column wise */ if (myrow == jbrow ) { for (k = ljb_j * grid->nprow; k < (ljb_j+1) * grid->nprow; k++) { (*bsendx_plist)[k] = recvBuf[k]; if ((*bsendx_plist)[k] != EMPTY) nbsendx ++; } } else { for (k = ljb_j * grid->nprow; k < (ljb_j+1) * grid->nprow; k++) (*bsendx_plist)[k] = EMPTY; } } } SUPERLU_FREE(nnzToRecv); SUPERLU_FREE(ptrToRecv); SUPERLU_FREE(nnzToSend); SUPERLU_FREE(ptrToSend); SUPERLU_FREE(recvBuf); Llu->Lrowind_bc_ptr = Lrowind_bc_ptr; Llu->Lnzval_bc_ptr = Lnzval_bc_ptr; Llu->Ufstnz_br_ptr = Ufstnz_br_ptr; Llu->Unzval_br_ptr = Unzval_br_ptr; Llu->ToRecv = ToRecv; Llu->ToSendD = ToSendD; Llu->ToSendR = ToSendR; Llu->fmod = fmod; Llu->fsendx_plist = fsendx_plist; Llu->nfrecvx = nfrecvx; Llu->nfsendx = nfsendx; Llu->bmod = bmod; Llu->bsendx_plist = bsendx_plist; Llu->nbrecvx = nbrecvx; Llu->nbsendx = nbsendx; Llu->ilsum = ilsum; Llu->ldalsum = ldaspa; LUstruct->Glu_persist = Glu_persist; #if ( PRNTlevel>=1 ) if ( !iam ) printf(".. # L blocks " IFMT "\t# U blocks " IFMT "\n", nLblocks, nUblocks); #endif k = CEILING( nsupers, grid->nprow );/* Number of local block rows */ if ( !(Llu->mod_bit = intMalloc_dist(k)) ) ABORT("Malloc fails for mod_bit[]."); /* Find the maximum buffer size. */ MPI_Allreduce(mybufmax, Llu->bufmax, NBUFFERS, mpi_int_t, MPI_MAX, grid->comm); #if ( DEBUGlevel>=1 ) /* Memory allocated but not freed: ilsum, fmod, fsendx_plist, bmod, bsendx_plist, ToRecv, ToSendR, ToSendD, mod_bit */ CHECK_MALLOC(iam, "Exit dist_psymbtonum()"); #endif return (- (memDist+memNLU)); } /* zdist_psymbtonum */ SuperLU_DIST_5.3.0/SRC/util_dist.h0000644013363400111340000001007313233431301015435 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Header for utilities */ #ifndef __SUPERLU_UTIL /* allow multiple inclusions */ #define __SUPERLU_UTIL #include #include #include #include #include "superlu_enum_consts.h" /* * Macros */ #ifndef USER_ABORT #define USER_ABORT(msg) superlu_abort_and_exit_dist(msg) #endif #define ABORT(err_msg) \ { char msg[256];\ sprintf(msg,"%s at line %d in file %s\n",err_msg,__LINE__, __FILE__);\ USER_ABORT(msg); } #ifndef USER_MALLOC #define USER_MALLOC(size) superlu_malloc_dist(size) #endif #define SUPERLU_MALLOC(size) USER_MALLOC(size) #ifndef USER_FREE #define USER_FREE(addr) superlu_free_dist(addr) #endif #define SUPERLU_FREE(addr) USER_FREE(addr) #define CHECK_MALLOC(pnum, where) { \ extern long int superlu_malloc_total; \ printf("(%d) %s: superlu_malloc_total (MB) %.6f\n", \ pnum, where, superlu_malloc_total*1e-6); \ } #define SUPERLU_MAX(x, y) ( (x) > (y) ? (x) : (y) ) #define SUPERLU_MIN(x, y) ( (x) < (y) ? (x) : (y) ) /* * Constants */ #define EMPTY (-1) #ifndef FALSE #define FALSE (0) #endif #ifndef TRUE #define TRUE (1) #endif /* * Type definitions */ typedef float flops_t; typedef unsigned char Logical; /* #ifdef _CRAY #define int short #endif */ typedef struct { int *panel_histo; /* histogram of panel size distribution */ double *utime; /* running time at various phases */ flops_t *ops; /* operation count at various phases */ int TinyPivots; /* number of tiny pivots */ int RefineSteps; /* number of iterative refinement steps */ int num_look_aheads; /* number of look ahead */ /*-- new --*/ float current_buffer; /* bytes allocated for buffer in numerical factorization */ float peak_buffer; /* monitor the peak buffer size (bytes) */ float gpu_buffer; /* monitor the buffer allocated on GPU (bytes) */ } SuperLUStat_t; /* Headers for 2 types of dynamatically managed memory */ typedef struct e_node { int size; /* length of the memory that has been used */ void *mem; /* pointer to the new malloc'd store */ } ExpHeader; typedef struct { int size; int used; int top1; /* grow upward, relative to &array[0] */ int top2; /* grow downward */ void *array; } LU_stack_t; /* Constants */ #define GluIntArray(n) (5 * (n) + 5) #if 0 // defined in superlu_enum_consts.h -- 1/20/2018 #define NO_MEMTYPE 6 /* 0: lusup; 1: ucol; 2: lsub; 3: usub 4: llvl; level number in L for ILU(k) 5: ulvl; level number in U for ILU(k) */ #endif /* Macros to manipulate stack */ #define StackFull(x) ( x + stack.used >= stack.size ) #define NotDoubleAlign(addr) ( (long)addr & 7 ) #define DoubleAlign(addr) ( ((long)addr + 7) & ~7L ) #define TempSpace(n, w) ( (2*w + 4 + NO_MARKER)*m*sizeof(int) + \ (w + 1)*n*sizeof(double) ) #define Reduce(alpha) ((alpha + 1) / 2) /* i.e. (alpha-1)/2 + 1 */ #define FIRSTCOL_OF_SNODE(i) (xsup[i]) #if ( PROFlevel>=1 ) #define TIC(t) t = SuperLU_timer_() #define TOC(t2, t1) t2 = SuperLU_timer_() - t1 #else #define TIC(t) #define TOC(t2, t1) #endif /********************************************************* * Macros used for easy access of sparse matrix entries. * *********************************************************/ #define L_SUB_START(col) ( Lstore->rowind_colptr[col] ) #define L_SUB(ptr) ( Lstore->rowind[ptr] ) #define L_NZ_START(col) ( Lstore->nzval_colptr[col] ) #define L_FST_SUPC(superno) ( Lstore->sup_to_col[superno] ) #define U_NZ_START(col) ( Ustore->colptr[col] ) #define U_SUB(ptr) ( Ustore->rowind[ptr] ) #endif /* __SUPERLU_UTIL */ SuperLU_DIST_5.3.0/SRC/dmemory.patch0000644013363400111340000000045113233431301015760 0ustar xiaoyessg132c132 < buf = (double *) SUPERLU_MALLOC(n * sizeof(double)); --- > buf = (double *) SUPERLU_MALLOC( SUPERLU_MAX(1, n) * sizeof(double) ); 141c141 < buf = (double *) SUPERLU_MALLOC(n * sizeof(double)); --- > buf = (double *) SUPERLU_MALLOC( SUPERLU_MAX(1, n) * sizeof(double)); SuperLU_DIST_5.3.0/SRC/pdgstrf2.c0000644013363400111340000003371713233431301015175 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Performs panel LU factorization. * *
 * -- Distributed SuperLU routine (version 5.2) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * August 15, 2014
 *
 * Modified:
 *   September 30, 2017
 *
 * 
 * Purpose
 * =======
 *   Panel factorization -- block column k
 *
 *   Factor diagonal and subdiagonal blocks and test for exact singularity.
 *   Only the column processes that own block column *k* participate
 *   in the work.
 *
 * Arguments
 * =========
 * options (input) superlu_dist_options_t* (global)
 *         The structure defines the input parameters to control
 *         how the LU decomposition will be performed.
 *
 * k0     (input) int (global)
 *        Counter of the next supernode to be factorized.
 *
 * k      (input) int (global)
 *        The column number of the block column to be factorized.
 *
 * thresh (input) double (global)
 *        The threshold value = s_eps * anorm.
 *
 * Glu_persist (input) Glu_persist_t*
 *        Global data structures (xsup, supno) replicated on all processes.
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh.
 *
 * Llu    (input/output) LocalLU_t*
 *        Local data structures to store distributed L and U matrices.
 *
 * U_diag_blk_send_req (input/output) MPI_Request*
 *        List of send requests to send down the diagonal block of U.
 *
 * tag_ub (input) int
 *        Upper bound of MPI tag values.
 *
 * stat   (output) SuperLUStat_t*
 *        Record the statistics about the factorization.
 *        See SuperLUStat_t structure defined in util.h.
 *
 * info   (output) int*
 *        = 0: successful exit
 *        < 0: if info = -i, the i-th argument had an illegal value
 *        > 0: if info = i, U(i,i) is exactly zero. The factorization has
 *             been completed, but the factor U is exactly singular,
 *             and division by zero will occur if it is used to solve a
 *             system of equations.
 * 
*/ #include #include "superlu_ddefs.h" /* This pdgstrf2 is based on TRSM function */ void pdgstrf2_trsm (superlu_dist_options_t * options, int_t k0, int_t k, double thresh, Glu_persist_t * Glu_persist, gridinfo_t * grid, LocalLU_t * Llu, MPI_Request * U_diag_blk_send_req, int tag_ub, SuperLUStat_t * stat, int *info) { /* printf("entering pdgstrf2 %d \n", grid->iam); */ int cols_left, iam, l, pkk, pr; int incx = 1, incy = 1; int nsupr; /* number of rows in the block (LDA) */ int nsupc; /* number of columns in the block */ int luptr; int_t i, myrow, krow, j, jfst, jlst, u_diag_cnt; int_t *xsup = Glu_persist->xsup; double *lusup, temp; double *ujrow, *ublk_ptr; /* pointer to the U block */ double alpha = -1, zero = 0.0; int_t Pr; MPI_Status status; MPI_Comm comm = (grid->cscp).comm; double t1, t2; /* Initialization. */ iam = grid->iam; Pr = grid->nprow; myrow = MYROW (iam, grid); krow = PROW (k, grid); pkk = PNUM (PROW (k, grid), PCOL (k, grid), grid); j = LBj (k, grid); /* Local block number */ jfst = FstBlockC (k); jlst = FstBlockC (k + 1); lusup = Llu->Lnzval_bc_ptr[j]; nsupc = SuperSize (k); if (Llu->Lrowind_bc_ptr[j]) nsupr = Llu->Lrowind_bc_ptr[j][1]; else nsupr = 0; #ifdef PI_DEBUG printf ("rank %d Iter %d k=%d \t dtrsm nsuper %d \n", iam, k0, k, nsupr); #endif ublk_ptr = ujrow = Llu->ujrow; luptr = 0; /* Point to the diagonal entries. */ cols_left = nsupc; /* supernode size */ int ld_ujrow = nsupc; /* leading dimension of ujrow */ u_diag_cnt = 0; incy = ld_ujrow; if ( U_diag_blk_send_req && U_diag_blk_send_req[myrow] != MPI_REQUEST_NULL ) { /* There are pending sends - wait for all Isend to complete */ #if ( PROFlevel>=1 ) TIC (t1); #endif for (pr = 0; pr < Pr; ++pr) { if (pr != myrow) { MPI_Wait (U_diag_blk_send_req + pr, &status); } } #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_DIAG] += t2; #endif /* flag no more outstanding send request. */ U_diag_blk_send_req[myrow] = MPI_REQUEST_NULL; } if (iam == pkk) { /* diagonal process */ /* ++++ First step compute diagonal block ++++++++++ */ for (j = 0; j < jlst - jfst; ++j) { /* for each column in panel */ /* Diagonal pivot */ i = luptr; /* Not to replace zero pivot. */ if (options->ReplaceTinyPivot == YES && lusup[i] != 0.0 ) { if (fabs (lusup[i]) < thresh) { /* Diagonal */ #if ( PRNTlevel>=2 ) printf ("(%d) .. col %d, tiny pivot %e ", iam, jfst + j, lusup[i]); #endif /* Keep the new diagonal entry with the same sign. */ if (lusup[i] < 0) lusup[i] = -thresh; else lusup[i] = thresh; #if ( PRNTlevel>=2 ) printf ("replaced by %e\n", lusup[i]); #endif ++(stat->TinyPivots); } } #if 0 for (l = 0; l < cols_left; ++l, i += nsupr, ++u_diag_cnt) ublk_ptr[u_diag_cnt] = lusup[i]; /* copy one row of U */ #endif /* storing U in full form */ int st; for (l = 0; l < cols_left; ++l, i += nsupr, ++u_diag_cnt) { st = j * ld_ujrow + j; ublk_ptr[st + l * ld_ujrow] = lusup[i]; /* copy one row of U */ } if ( ujrow[0] == zero ) { /* Test for singularity. */ *info = j + jfst + 1; } else { /* Scale the j-th column within diag. block. */ temp = 1.0 / ujrow[0]; for (i = luptr + 1; i < luptr - j + nsupc; ++i) lusup[i] *= temp; stat->ops[FACT] += nsupc - j - 1; } /* Rank-1 update of the trailing submatrix within diag. block. */ if (--cols_left) { /* l = nsupr - j - 1; */ l = nsupc - j - 1; /* Piyush */ dger_ (&l, &cols_left, &alpha, &lusup[luptr + 1], &incx, &ujrow[ld_ujrow], &incy, &lusup[luptr + nsupr + 1], &nsupr); stat->ops[FACT] += 2 * l * cols_left; } /* ujrow = ublk_ptr + u_diag_cnt; */ ujrow = ujrow + ld_ujrow + 1; /* move to next row of U */ luptr += nsupr + 1; /* move to next column */ } /* for column j ... first loop */ /* ++++ Second step compute off-diagonal block with communication ++*/ ublk_ptr = ujrow = Llu->ujrow; if (U_diag_blk_send_req && iam == pkk) { /* Send the U block downward */ /** ALWAYS SEND TO ALL OTHERS - TO FIX **/ #if ( PROFlevel>=1 ) TIC (t1); #endif for (pr = 0; pr < Pr; ++pr) { if (pr != krow) { /* tag = ((k0<<2)+2) % tag_ub; */ /* tag = (4*(nsupers+k0)+2) % tag_ub; */ MPI_Isend (ublk_ptr, nsupc * nsupc, MPI_DOUBLE, pr, SLU_MPI_TAG (4, k0) /* tag */ , comm, U_diag_blk_send_req + pr); } } #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_DIAG] += t2; #endif /* flag outstanding Isend */ U_diag_blk_send_req[krow] = (MPI_Request) TRUE; /* Sherry */ } /* pragma below would be changed by an MKL call */ l = nsupr - nsupc; // n = nsupc; double alpha = 1.0; #ifdef PI_DEBUG printf ("calling dtrsm\n"); printf ("dtrsm diagonal param 11: %d \n", nsupr); #endif #if defined (USE_VENDOR_BLAS) dtrsm_ ("R", "U", "N", "N", &l, &nsupc, &alpha, ublk_ptr, &ld_ujrow, &lusup[nsupc], &nsupr, 1, 1, 1, 1); #else dtrsm_ ("R", "U", "N", "N", &l, &nsupc, &alpha, ublk_ptr, &ld_ujrow, &lusup[nsupc], &nsupr); #endif stat->ops[FACT] += (flops_t) nsupc * (nsupc+1) * l; } else { /* non-diagonal process */ /* ================================================================== * * Receive the diagonal block of U for panel factorization of L(:,k). * * Note: we block for panel factorization of L(:,k), but panel * * factorization of U(:,k) do not block * * ================================================================== */ /* tag = ((k0<<2)+2) % tag_ub; */ /* tag = (4*(nsupers+k0)+2) % tag_ub; */ // printf("hello message receiving%d %d\n",(nsupc*(nsupc+1))>>1,SLU_MPI_TAG(4,k0)); #if ( PROFlevel>=1 ) TIC (t1); #endif MPI_Recv (ublk_ptr, (nsupc * nsupc), MPI_DOUBLE, krow, SLU_MPI_TAG (4, k0) /* tag */ , comm, &status); #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_DIAG] += t2; #endif if (nsupr > 0) { double alpha = 1.0; #ifdef PI_DEBUG printf ("dtrsm non diagonal param 11: %d \n", nsupr); if (!lusup) printf (" Rank :%d \t Empty block column occurred :\n", iam); #endif #if defined (USE_VENDOR_BLAS) dtrsm_ ("R", "U", "N", "N", &nsupr, &nsupc, &alpha, ublk_ptr, &ld_ujrow, lusup, &nsupr, 1, 1, 1, 1); #else dtrsm_ ("R", "U", "N", "N", &nsupr, &nsupc, &alpha, ublk_ptr, &ld_ujrow, lusup, &nsupr); #endif stat->ops[FACT] += (flops_t) nsupc * (nsupc+1) * nsupr; } } /* end if pkk ... */ /* printf("exiting pdgstrf2 %d \n", grid->iam); */ } /* PDGSTRF2_trsm */ /************************************************************************/ void pdgstrs2_omp /************************************************************************/ (int_t k0, int_t k, Glu_persist_t * Glu_persist, gridinfo_t * grid, LocalLU_t * Llu, SuperLUStat_t * stat) { #ifdef PI_DEBUG printf("====Entering pdgstrs2==== \n"); #endif int iam, pkk; int incx = 1; int nsupr; /* number of rows in the block L(:,k) (LDA) */ int segsize; int nsupc; /* number of columns in the block */ int_t luptr, iukp, rukp; int_t b, gb, j, klst, knsupc, lk, nb; int_t *xsup = Glu_persist->xsup; int_t *usub; double *lusup, *uval; #if 0 //#ifdef USE_VTUNE __SSC_MARK(0x111);// start SDE tracing, note uses 2 underscores __itt_resume(); // start VTune, again use 2 underscores #endif /* Quick return. */ lk = LBi (k, grid); /* Local block number */ if (!Llu->Unzval_br_ptr[lk]) return; /* Initialization. */ iam = grid->iam; pkk = PNUM (PROW (k, grid), PCOL (k, grid), grid); //int k_row_cycle = k / grid->nprow; /* for which cycle k exist (to assign rowwise thread blocking) */ //int gb_col_cycle; /* cycle through block columns */ klst = FstBlockC (k + 1); knsupc = SuperSize (k); usub = Llu->Ufstnz_br_ptr[lk]; /* index[] of block row U(k,:) */ uval = Llu->Unzval_br_ptr[lk]; if (iam == pkk) { lk = LBj (k, grid); nsupr = Llu->Lrowind_bc_ptr[lk][1]; /* LDA of lusup[] */ lusup = Llu->Lnzval_bc_ptr[lk]; } else { nsupr = Llu->Lsub_buf_2[k0 % (1 + stat->num_look_aheads)][1]; /* LDA of lusup[] */ lusup = Llu->Lval_buf_2[k0 % (1 + stat->num_look_aheads)]; } /////////////////////new-test////////////////////////// /* !! Taken from Carl/SuperLU_DIST_5.1.0/EXAMPLE/pdgstrf2_v3.c !! */ /* Master thread: set up pointers to each block in the row */ nb = usub[0]; iukp = BR_HEADER; rukp = 0; int* blocks_index_pointers = SUPERLU_MALLOC (3 * nb * sizeof(int)); int* blocks_value_pointers = blocks_index_pointers + nb; int* nsupc_temp = blocks_value_pointers + nb; for (b = 0; b < nb; b++) { /* set up pointers to each block */ blocks_index_pointers[b] = iukp + UB_DESCRIPTOR; blocks_value_pointers[b] = rukp; gb = usub[iukp]; rukp += usub[iukp+1]; nsupc = SuperSize( gb ); nsupc_temp[b] = nsupc; iukp += (UB_DESCRIPTOR + nsupc); /* move to the next block */ } // Sherry: this version is more NUMA friendly compared to pdgstrf2_v2.c // https://stackoverflow.com/questions/13065943/task-based-programming-pragma-omp-task-versus-pragma-omp-parallel-for #pragma omp parallel for schedule(static) default(shared) \ private(b,j,iukp,rukp,segsize) /* Loop through all the blocks in the row. */ for (b = 0; b < nb; ++b) { iukp = blocks_index_pointers[b]; rukp = blocks_value_pointers[b]; /* Loop through all the segments in the block. */ for (j = 0; j < nsupc_temp[b]; j++) { segsize = klst - usub[iukp++]; if (segsize) { #pragma omp task default(shared) firstprivate(segsize,rukp) if (segsize > 30) { /* Nonzero segment. */ int_t luptr = (knsupc - segsize) * (nsupr + 1); //printf("[2] segsize %d, nsupr %d\n", segsize, nsupr); #if defined (USE_VENDOR_BLAS) dtrsv_ ("L", "N", "U", &segsize, &lusup[luptr], &nsupr, &uval[rukp], &incx, 1, 1, 1); #else dtrsv_ ("L", "N", "U", &segsize, &lusup[luptr], &nsupr, &uval[rukp], &incx); #endif } /* end task */ rukp += segsize; stat->ops[FACT] += segsize * (segsize + 1); } /* end if segsize > 0 */ } /* end for j in parallel ... */ /* #pragma omp taskwait */ } /* end for b ... */ /* Deallocate memory */ SUPERLU_FREE(blocks_index_pointers); #if 0 //#ifdef USE_VTUNE __itt_pause(); // stop VTune __SSC_MARK(0x222); // stop SDE tracing #endif } /* PDGSTRS2_omp */ SuperLU_DIST_5.3.0/SRC/pdgstrs1.c0000644013363400111340000006453013233431301015206 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Solves a system of distributed linear equations * *
 * -- Distributed SuperLU routine (version 2.3) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * October 15, 2008
 *
 * Modified:
 *     Feburary 7, 2001    use MPI_Isend/MPI_Irecv
 *     October 2, 2001     use MPI_Isend/MPI_Irecv with MPI_Test
 *     October 15, 2008  use fewer MPI_Reduce
 * 
*/ #include "superlu_ddefs.h" #define ISEND_IRECV /* * Function prototypes */ #ifdef _CRAY fortran void STRSM(_fcd, _fcd, _fcd, _fcd, int*, int*, double*, double*, int*, double*, int*); fortran void SGEMM(_fcd, _fcd, int*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int*); _fcd ftcs1; _fcd ftcs2; _fcd ftcs3; #endif /*! \brief * *
 * Purpose
 * =======
 *
 * PDGSTRS1 solves a system of distributed linear equations
 *
 *                   op( sub(A) ) * X = sub( B )
 *
 * with a general N-by-N distributed matrix sub( A ) using the LU
 * factorization computed by PDGSTRF.
 *
 * This routine is used only in the iterative refinement routine
 * pdgsrfs_ABXglobal, assuming that the right-hand side is already
 * distributed in the diagonal processes.
 * 
 * Arguments
 * =========
 *
 * n      (input) int (global)
 *        The order of the system of linear equations.
 *
 * LUstruct (input) LUstruct_t*
 *        The distributed data structures to store L and U factors,
 *        and the permutation vectors.
 *        See superlu_ddefs.h for the definition of 'LUstruct_t' structure.
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh.
 *
 * x      (input/output) double*
 *        On entry, the right hand side matrix.
 *        On exit, the solution matrix if info = 0;
 *
 *        NOTE: the right-hand side matrix is already distributed on
 *              the diagonal processes.
 *
 * nrhs   (input) int (global)
 *        Number of right-hand sides.
 *
 * stat   (output) SuperLUStat_t*
 *        Record the statistics about the triangular solves; 
 *        See SuperLUStat_t structure defined in util.h.
 *
 * info   (output) int*
 * 	   = 0: successful exit
 *	   < 0: if info = -i, the i-th argument had an illegal value
 * 
*/ void pdgstrs1(int_t n, LUstruct_t *LUstruct, gridinfo_t *grid, double *x, int nrhs, SuperLUStat_t *stat, int *info) { Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; double alpha = 1.0; double *lsum; /* Local running sum of the updates to B-components */ double *lusup, *dest; double *recvbuf, *tempv; double *rtemp; /* Result of full matrix-vector multiply. */ int_t **Ufstnz_br_ptr = Llu->Ufstnz_br_ptr; int_t *Urbs, *Urbs1; /* Number of row blocks in each block column of U. */ Ucb_indptr_t **Ucb_indptr;/* Vertical linked list pointing to Uindex[] */ int_t **Ucb_valptr; /* Vertical linked list pointing to Unzval[] */ int iam, kcol, krow, mycol, myrow; int_t i, ii, il, j, k, lb, ljb, lk, lptr, luptr; int_t nb, nlb, nub, nsupers; int_t *xsup, *lsub, *usub; int_t *ilsum; /* Starting position of each supernode in lsum (LOCAL)*/ int_t Pc, Pr; int knsupc, nsupr; int ldalsum; /* Number of lsum entries locally owned. */ int maxrecvsz, p, pi; int_t **Lrowind_bc_ptr; double **Lnzval_bc_ptr; MPI_Status status; #ifdef ISEND_IRECV MPI_Request *send_req, recv_req; #endif /*-- Counts used for L-solve --*/ int_t *fmod; /* Modification count for L-solve. */ int_t **fsendx_plist = Llu->fsendx_plist; int_t nfrecvx = Llu->nfrecvx; /* Number of X components to be recv'd. */ int_t *frecv; /* Count of modifications to be recv'd from processes in this row. */ int_t nfrecvmod = 0; /* Count of total modifications to be recv'd. */ int_t nleaf = 0, nroot = 0; /*-- Counts used for U-solve --*/ int_t *bmod; /* Modification count for L-solve. */ int_t **bsendx_plist = Llu->bsendx_plist; int_t nbrecvx = Llu->nbrecvx; /* Number of X components to be recv'd. */ int_t *brecv; /* Count of modifications to be recv'd from processes in this row. */ int_t nbrecvmod = 0; /* Count of total modifications to be recv'd. */ double t; #if ( DEBUGlevel>=2 ) int_t Ublocks = 0; #endif int_t *mod_bit = Llu->mod_bit; /* flag contribution from each row block */ t = SuperLU_timer_(); /* Test input parameters. */ *info = 0; if ( n < 0 ) *info = -1; else if ( nrhs < 0 ) *info = -8; if ( *info ) { pxerr_dist("PDGSTRS1", grid, -*info); return; } /* * Initialization. */ iam = grid->iam; Pc = grid->npcol; Pr = grid->nprow; myrow = MYROW( iam, grid ); mycol = MYCOL( iam, grid ); nsupers = Glu_persist->supno[n-1] + 1; xsup = Glu_persist->xsup; Lrowind_bc_ptr = Llu->Lrowind_bc_ptr; Lnzval_bc_ptr = Llu->Lnzval_bc_ptr; nlb = CEILING( nsupers, Pr ); /* Number of local block rows. */ Llu->SolveMsgSent = 0; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter pdgstrs1()"); #endif /* Save the count to be altered so it can be used by subsequent call to PDGSTRS1. */ if ( !(fmod = intMalloc_dist(nlb)) ) ABORT("Calloc fails for fmod[]."); for (i = 0; i < nlb; ++i) fmod[i] = Llu->fmod[i]; if ( !(frecv = intMalloc_dist(nlb)) ) ABORT("Malloc fails for frecv[]."); Llu->frecv = frecv; #ifdef ISEND_IRECV k = SUPERLU_MAX( Llu->nfsendx, Llu->nbsendx ) + nlb; if ( !(send_req = (MPI_Request*) SUPERLU_MALLOC(k*sizeof(MPI_Request))) ) ABORT("Malloc fails for send_req[]."); #endif #ifdef _CRAY ftcs1 = _cptofcd("L", strlen("L")); ftcs2 = _cptofcd("N", strlen("N")); ftcs3 = _cptofcd("U", strlen("U")); #endif /* Compute ilsum[] and ldalsum for process column 0. */ ilsum = Llu->ilsum; ldalsum = Llu->ldalsum; /* Allocate working storage. */ knsupc = sp_ienv_dist(3); if ( !(lsum = doubleCalloc_dist(((size_t)ldalsum) * nrhs + nlb * LSUM_H)) ) ABORT("Calloc fails for lsum[]."); maxrecvsz = knsupc * nrhs + SUPERLU_MAX(XK_H, LSUM_H); if ( !(recvbuf = doubleMalloc_dist(maxrecvsz)) ) ABORT("Malloc fails for recvbuf[]."); if ( !(rtemp = doubleCalloc_dist(maxrecvsz)) ) ABORT("Malloc fails for rtemp[]."); /*--------------------------------------------------- * Forward solve Ly = b. *---------------------------------------------------*/ /* * Prepended the block number in the header for lsum[]. */ for (k = 0; k < nsupers; ++k) { knsupc = SuperSize( k ); krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ il = LSUM_BLK( lk ); lsum[il - LSUM_H] = k; } } /* * Compute frecv[] and nfrecvmod counts on the diagonal processes. */ { superlu_scope_t *scp = &grid->rscp; #if 1 for (k = 0; k < nlb; ++k) mod_bit[k] = 0; for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* local block number */ kcol = PCOL( k, grid ); if ( mycol != kcol && fmod[lk] ) mod_bit[lk] = 1; /* contribution from off-diagonal */ } } /*PrintInt10("mod_bit", nlb, mod_bit);*/ /* Every process receives the count, but it is only useful on the diagonal processes. */ MPI_Allreduce( mod_bit, frecv, nlb, mpi_int_t, MPI_SUM, scp->comm ); for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* local block number */ kcol = PCOL( k, grid ); if ( mycol == kcol ) { /* diagonal process */ nfrecvmod += frecv[lk]; if ( !frecv[lk] && !fmod[lk] ) ++nleaf; } } } #else /* old */ for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ kcol = PCOL( k, grid ); /* Root process in this row scope. */ if ( mycol != kcol && fmod[lk] ) i = 1; /* Contribution from non-diagonal process. */ else i = 0; MPI_Reduce( &i, &frecv[lk], 1, mpi_int_t, MPI_SUM, kcol, scp->comm ); if ( mycol == kcol ) { /* Diagonal process. */ nfrecvmod += frecv[lk]; if ( !frecv[lk] && !fmod[lk] ) ++nleaf; #if ( DEBUGlevel>=2 ) printf("(%2d) frecv[%4d] %2d\n", iam, k, frecv[lk]); assert( frecv[lk] < Pc ); #endif } } } #endif } /* --------------------------------------------------------- Solve the leaf nodes first by all the diagonal processes. --------------------------------------------------------- */ #if ( DEBUGlevel>=2 ) printf("(%2d) nleaf %4d\n", iam, nleaf); #endif for (k = 0; k < nsupers && nleaf; ++k) { krow = PROW( k, grid ); kcol = PCOL( k, grid ); if ( myrow == krow && mycol == kcol ) { /* Diagonal process */ knsupc = SuperSize( k ); lk = LBi( k, grid ); if ( !frecv[lk] && !fmod[lk] ) { fmod[lk] = -1; /* Do not solve X[k] in the future. */ ii = X_BLK( lk ); lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY STRSM(ftcs1, ftcs1, ftcs2, ftcs3, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #elif defined (USE_VENDOR_BLAS) dtrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc, 1, 1, 1, 1); #else dtrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif /*stat->ops[SOLVE] += knsupc * (knsupc - 1) * nrhs;*/ --nleaf; #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ for (p = 0; p < Pr; ++p) if ( fsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); #ifdef ISEND_IRECV MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm, &send_req[Llu->SolveMsgSent++]); #else MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm ); #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii-XK_H], pi); #endif } /* * Perform local block modifications: lsum[i] -= L_i,k * X[k] */ nb = lsub[0] - 1; lptr = BC_HEADER + LB_DESCRIPTOR + knsupc; luptr = knsupc; /* Skip diagonal block L(k,k). */ dlsum_fmod(lsum, x, &x[ii], rtemp, nrhs, knsupc, k, fmod, nb, lptr, luptr, xsup, grid, Llu, send_req, stat); } } /* if diagonal process ... */ } /* for k ... */ /* * Compute the internal nodes asynchronously by all processes. */ #if ( DEBUGlevel>=2 ) printf("(%2d) nfrecvx %4d, nfrecvmod %4d, nleaf %4d\n", iam, nfrecvx, nfrecvmod, nleaf); #endif while ( nfrecvx || nfrecvmod ) { /* While not finished. */ /* Receive a message. */ #ifdef ISEND_IRECV /* -MPI- FATAL: Remote protocol queue full */ MPI_Irecv( recvbuf, maxrecvsz, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, grid->comm, &recv_req ); MPI_Wait( &recv_req, &status ); #else MPI_Recv( recvbuf, maxrecvsz, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, grid->comm, &status ); #endif k = *recvbuf; #if ( DEBUGlevel>=2 ) printf("(%2d) Recv'd block %d, tag %2d\n", iam, k, status.MPI_TAG); #endif switch ( status.MPI_TAG ) { case Xk: --nfrecvx; lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; if ( lsub ) { nb = lsub[0]; lptr = BC_HEADER; luptr = 0; knsupc = SuperSize( k ); /* * Perform local block modifications: lsum[i] -= L_i,k * X[k] */ dlsum_fmod(lsum, x, &recvbuf[XK_H], rtemp, nrhs, knsupc, k, fmod, nb, lptr, luptr, xsup, grid, Llu, send_req, stat); } /* if lsub */ break; case LSUM: --nfrecvmod; lk = LBi( k, grid ); /* Local block number, row-wise. */ ii = X_BLK( lk ); knsupc = SuperSize( k ); tempv = &recvbuf[LSUM_H]; RHS_ITERATE(j) for (i = 0; i < knsupc; ++i) x[i + ii + j*knsupc] += tempv[i + j*knsupc]; if ( (--frecv[lk])==0 && fmod[lk]==0 ) { fmod[lk] = -1; /* Do not solve X[k] in the future. */ lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY STRSM(ftcs1, ftcs1, ftcs2, ftcs3, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #elif defined (USE_VENDOR_BLAS) dtrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc, 1, 1, 1, 1); #else dtrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif /*stat->ops[SOLVE] += knsupc * (knsupc - 1) * nrhs;*/ #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ kcol = PCOL( k, grid ); for (p = 0; p < Pr; ++p) if ( fsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); #ifdef ISEND_IRECV MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm, &send_req[Llu->SolveMsgSent++] ); #else MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm ); #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii-XK_H], pi); #endif } /* * Perform local block modifications. */ nb = lsub[0] - 1; lptr = BC_HEADER + LB_DESCRIPTOR + knsupc; luptr = knsupc; /* Skip diagonal block L(k,k). */ dlsum_fmod(lsum, x, &x[ii], rtemp, nrhs, knsupc, k, fmod, nb, lptr, luptr, xsup, grid, Llu, send_req, stat); } /* if */ break; #if ( DEBUGlevel>=2 ) default: printf("(%2d) Recv'd wrong message tag %4d\n", iam, status.MPI_TAG); break; #endif } /* switch */ } /* while not finished ... */ #if ( PRNTlevel>=2 ) t = SuperLU_timer_() - t; if ( !iam ) printf(".. L-solve time\t%8.2f\n", t); t = SuperLU_timer_(); #endif #if ( DEBUGlevel>=2 ) if ( !iam ) printf("\n.. After L-solve: y =\n"); for (i = 0, k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); kcol = PCOL( k, grid ); if ( myrow == krow && mycol == kcol ) { /* Diagonal process */ knsupc = SuperSize( k ); lk = LBi( k, grid ); ii = X_BLK( lk ); for (j = 0; j < knsupc; ++j) printf("\t(%d)\t%4d\t%.10f\n", iam, xsup[k]+j, x[ii+j]); } MPI_Barrier( grid->comm ); } #endif SUPERLU_FREE(fmod); SUPERLU_FREE(frecv); SUPERLU_FREE(rtemp); #ifdef ISEND_IRECV for (i = 0; i < Llu->SolveMsgSent; ++i) MPI_Request_free(&send_req[i]); Llu->SolveMsgSent = 0; #endif /*--------------------------------------------------- * Back solve Ux = y. * * The Y components from the forward solve is already * on the diagonal processes. *---------------------------------------------------*/ /* Save the count to be altered so it can be used by subsequent call to PDGSTRS1. */ if ( !(bmod = intMalloc_dist(nlb)) ) ABORT("Calloc fails for bmod[]."); for (i = 0; i < nlb; ++i) bmod[i] = Llu->bmod[i]; if ( !(brecv = intMalloc_dist(nlb)) ) ABORT("Malloc fails for brecv[]."); Llu->brecv = brecv; /* * Compute brecv[] and nbrecvmod counts on the diagonal processes. */ { superlu_scope_t *scp = &grid->rscp; #if 1 for (k = 0; k < nlb; ++k) mod_bit[k] = 0; for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ kcol = PCOL( k, grid ); /* Root process in this row scope. */ if ( mycol != kcol && bmod[lk] ) mod_bit[lk] = 1; /* Contribution from off-diagonal */ } } /* Every process receives the count, but it is only useful on the diagonal processes. */ MPI_Allreduce( mod_bit, brecv, nlb, mpi_int_t, MPI_SUM, scp->comm ); for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ kcol = PCOL( k, grid ); /* Root process in this row scope. */ if ( mycol == kcol ) { /* Diagonal process. */ nbrecvmod += brecv[lk]; if ( !brecv[lk] && !bmod[lk] ) ++nroot; #if ( DEBUGlevel>=2 ) printf("(%2d) brecv[%4d] %2d\n", iam, k, brecv[lk]); assert( brecv[lk] < Pc ); #endif } } } #else for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ kcol = PCOL( k, grid ); /* Root process in this row scope. */ if ( mycol != kcol && bmod[lk] ) i = 1; /* Contribution from non-diagonal process. */ else i = 0; MPI_Reduce( &i, &brecv[lk], 1, mpi_int_t, MPI_SUM, kcol, scp->comm ); if ( mycol == kcol ) { /* Diagonal process. */ nbrecvmod += brecv[lk]; if ( !brecv[lk] && !bmod[lk] ) ++nroot; #if ( DEBUGlevel>=2 ) printf("(%2d) brecv[%4d] %2d\n", iam, k, brecv[lk]); assert( brecv[lk] < Pc ); #endif } } } #endif } /* Re-initialize lsum to zero. Each block header is already in place. */ for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { knsupc = SuperSize( k ); lk = LBi( k, grid ); il = LSUM_BLK( lk ); dest = &lsum[il]; RHS_ITERATE(j) for (i = 0; i < knsupc; ++i) dest[i + j*knsupc] = 0.0; } } /* Set up additional pointers for the index and value arrays of U. nlb is the number of local block rows. */ nub = CEILING( nsupers, Pc ); /* Number of local block columns. */ if ( !(Urbs = (int_t *) intCalloc_dist(2*((size_t)nub))) ) ABORT("Malloc fails for Urbs[]"); /* Record number of nonzero blocks in a block column. */ Urbs1 = Urbs + nub; if ( !(Ucb_indptr = SUPERLU_MALLOC(nub * sizeof(Ucb_indptr_t *))) ) ABORT("Malloc fails for Ucb_indptr[]"); if ( !(Ucb_valptr = SUPERLU_MALLOC(nub * sizeof(int_t *))) ) ABORT("Malloc fails for Ucb_valptr[]"); /* Count number of row blocks in a block column. One pass of the skeleton graph of U. */ for (lk = 0; lk < nlb; ++lk) { usub = Ufstnz_br_ptr[lk]; if ( usub ) { /* Not an empty block row. */ /* usub[0] -- number of column blocks in this block row. */ #if ( DEBUGlevel>=2 ) Ublocks += usub[0]; #endif i = BR_HEADER; /* Pointer in index array. */ for (lb = 0; lb < usub[0]; ++lb) { /* For all column blocks. */ k = usub[i]; /* Global block number */ ++Urbs[LBj(k,grid)]; i += UB_DESCRIPTOR + SuperSize( k ); } } } /* Set up the vertical linked lists for the row blocks. One pass of the skeleton graph of U. */ for (lb = 0; lb < nub; ++lb) if ( Urbs[lb] ) { /* Not an empty block column. */ if ( !(Ucb_indptr[lb] = SUPERLU_MALLOC(Urbs[lb] * sizeof(Ucb_indptr_t))) ) ABORT("Malloc fails for Ucb_indptr[lb][]"); if ( !(Ucb_valptr[lb] = (int_t *) intMalloc_dist(Urbs[lb])) ) ABORT("Malloc fails for Ucb_valptr[lb][]"); } for (lk = 0; lk < nlb; ++lk) { /* For each block row. */ usub = Ufstnz_br_ptr[lk]; if ( usub ) { /* Not an empty block row. */ i = BR_HEADER; /* Pointer in index array. */ j = 0; /* Pointer in nzval array. */ for (lb = 0; lb < usub[0]; ++lb) { /* For all column blocks. */ k = usub[i]; /* Global block number, column-wise. */ ljb = LBj( k, grid ); /* Local block number, column-wise. */ Ucb_indptr[ljb][Urbs1[ljb]].lbnum = lk; Ucb_indptr[ljb][Urbs1[ljb]].indpos = i; Ucb_valptr[ljb][Urbs1[ljb]] = j; ++Urbs1[ljb]; j += usub[i+1]; i += UB_DESCRIPTOR + SuperSize( k ); } } } #if ( DEBUGlevel>=2 ) for (p = 0; p < Pr*Pc; ++p) { if (iam == p) { printf("(%2d) .. Ublocks %d\n", iam, Ublocks); for (lb = 0; lb < nub; ++lb) { printf("(%2d) Local col %2d: # row blocks %2d\n", iam, lb, Urbs[lb]); if ( Urbs[lb] ) { for (i = 0; i < Urbs[lb]; ++i) printf("(%2d) .. row blk %2d:\ lbnum %d, indpos %d, valpos %d\n", iam, i, Ucb_indptr[lb][i].lbnum, Ucb_indptr[lb][i].indpos, Ucb_valptr[lb][i]); } } } MPI_Barrier( grid->comm ); } for (p = 0; p < Pr*Pc; ++p) { if ( iam == p ) { printf("\n(%d) bsendx_plist[][]", iam); for (lb = 0; lb < nub; ++lb) { printf("\n(%d) .. local col %2d: ", iam, lb); for (i = 0; i < Pr; ++i) printf("%4d", bsendx_plist[lb][i]); } printf("\n"); } MPI_Barrier( grid->comm ); } #endif /* DEBUGlevel */ #if ( PRNTlevel>=2 ) t = SuperLU_timer_() - t; if ( !iam) printf(".. Setup U-solve time\t%8.2f\n", t); t = SuperLU_timer_(); #endif /* * Solve the roots first by all the diagonal processes. */ #if ( DEBUGlevel>=2 ) printf("(%2d) nroot %4d\n", iam, nroot); #endif for (k = nsupers-1; k >= 0 && nroot; --k) { krow = PROW( k, grid ); kcol = PCOL( k, grid ); if ( myrow == krow && mycol == kcol ) { /* Diagonal process. */ knsupc = SuperSize( k ); lk = LBi( k, grid ); /* Local block number, row-wise. */ if ( !brecv[lk] && !bmod[lk] ) { bmod[lk] = -1; /* Do not solve X[k] in the future. */ ii = X_BLK( lk ); lk = LBj( k, grid ); /* Local block number, column-wise */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY STRSM(ftcs1, ftcs3, ftcs2, ftcs2, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #elif defined (USE_VENDOR_BLAS) dtrsm_("L", "U", "N", "N", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc, 1, 1, 1, 1); #else dtrsm_("L", "U", "N", "N", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif /*stat->ops[SOLVE] += knsupc * (knsupc + 1) * nrhs;*/ --nroot; #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ for (p = 0; p < Pr; ++p) if ( bsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); #ifdef ISEND_IRECV MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm, &send_req[Llu->SolveMsgSent++] ); #else MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm ); #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii-XK_H], pi); #endif } /* * Perform local block modifications: lsum[i] -= U_i,k * X[k] */ if ( Urbs[lk] ) dlsum_bmod(lsum, x, &x[ii], nrhs, k, bmod, Urbs, Ucb_indptr, Ucb_valptr, xsup, grid, Llu, send_req, stat); } /* if root ... */ } /* if diagonal process ... */ } /* for k ... */ /* * Compute the internal nodes asynchronously by all processes. */ while ( nbrecvx || nbrecvmod ) { /* While not finished. */ /* Receive a message. */ MPI_Recv( recvbuf, maxrecvsz, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, grid->comm, &status ); k = *recvbuf; #if ( DEBUGlevel>=2 ) printf("(%2d) Recv'd block %d, tag %2d\n", iam, k, status.MPI_TAG); #endif switch ( status.MPI_TAG ) { case Xk: --nbrecvx; lk = LBj( k, grid ); /* Local block number, column-wise. */ /* * Perform local block modifications: * lsum[i] -= U_i,k * X[k] */ dlsum_bmod(lsum, x, &recvbuf[XK_H], nrhs, k, bmod, Urbs, Ucb_indptr, Ucb_valptr, xsup, grid, Llu, send_req, stat); break; case LSUM: --nbrecvmod; lk = LBi( k, grid ); /* Local block number, row-wise. */ ii = X_BLK( lk ); knsupc = SuperSize( k ); tempv = &recvbuf[LSUM_H]; RHS_ITERATE(j) for (i = 0; i < knsupc; ++i) x[i + ii + j*knsupc] += tempv[i + j*knsupc]; if ( !(--brecv[lk]) && !bmod[lk] ) { bmod[lk] = -1; /* Do not solve X[k] in the future. */ lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY STRSM(ftcs1, ftcs3, ftcs2, ftcs2, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #elif defined (USE_VENDOR_BLAS) dtrsm_("L", "U", "N", "N", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc, 1, 1, 1, 1); #else dtrsm_("L", "U", "N", "N", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif /*stat->ops[SOLVE] += knsupc * (knsupc + 1) * nrhs;*/ #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ kcol = PCOL( k, grid ); for (p = 0; p < Pr; ++p) if ( bsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); #ifdef ISEND_IRECV MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm, &send_req[Llu->SolveMsgSent++] ); #else MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm ); #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii - XK_H], pi); #endif } /* * Perform local block modifications: * lsum[i] -= U_i,k * X[k] */ if ( Urbs[lk] ) dlsum_bmod(lsum, x, &x[ii], nrhs, k, bmod, Urbs, Ucb_indptr, Ucb_valptr, xsup, grid, Llu, send_req, stat); } /* if becomes solvable */ break; #if ( DEBUGlevel>=2 ) default: printf("(%2d) Recv'd wrong message tag %4d\n", iam, status.MPI_TAG); break; #endif } /* switch */ } /* while not finished ... */ #if ( PRNTlevel>=2 ) t = SuperLU_timer_() - t; if ( !iam ) printf(".. U-solve time\t%8.2f\n", t); #endif stat->utime[SOLVE] = SuperLU_timer_() - t; /* Deallocate storage. */ SUPERLU_FREE(lsum); SUPERLU_FREE(recvbuf); for (i = 0; i < nub; ++i) if ( Urbs[i] ) { SUPERLU_FREE(Ucb_indptr[i]); SUPERLU_FREE(Ucb_valptr[i]); } SUPERLU_FREE(Ucb_indptr); SUPERLU_FREE(Ucb_valptr); SUPERLU_FREE(Urbs); SUPERLU_FREE(bmod); SUPERLU_FREE(brecv); #ifdef ISEND_IRECV for (i = 0; i < Llu->SolveMsgSent; ++i) MPI_Request_free(&send_req[i]); SUPERLU_FREE(send_req); #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pdgstrs1()"); #endif } /* PDGSTRS1 */ SuperLU_DIST_5.3.0/SRC/pdgstrsL.c0000644013363400111340000006450313233431301015241 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Solves a lower triangular system L*X = B, with L being the * lower triangular factor computed previously by PDGSTRF. * *
 * -- Distributed SuperLU routine (version 2.3) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * October 15, 2008
 * 
*/ #include "superlu_ddefs.h" #define ISEND_IRECV /* * Function prototypes */ #ifdef _CRAY fortran void STRSM(_fcd, _fcd, _fcd, _fcd, int*, int*, double*, double*, int*, double*, int*); _fcd ftcs1; _fcd ftcs2; _fcd ftcs3; #endif /*! \brief * *
 * Purpose
 * =======
 *   Re-distribute B on the diagonal processes of the 2D process mesh.
 * 
 * Note
 * ====
 *   This routine can only be called after the routine pxgstrs_init(),
 *   in which the structures of the send and receive buffers are set up.
 *
 * Arguments
 * =========
 * 
 * B      (input) double*
 *        The distributed right-hand side matrix of the possibly
 *        equilibrated system.
 *
 * m_loc  (input) int (local)
 *        The local row dimension of matrix B.
 *
 * nrhs   (input) int (global)
 *        Number of right-hand sides.
 *
 * ldb    (input) int (local)
 *        Leading dimension of matrix B.
 *
 * fst_row (input) int (global)
 *        The row number of B's first row in the global matrix.
 *
 * ilsum  (input) int* (global)
 *        Starting position of each supernode in a full array.
 *
 * x      (output) double*
 *        The solution vector. It is valid only on the diagonal processes.
 *
 * ScalePermstruct (input) ScalePermstruct_t*
 *        The data structure to store the scaling and permutation vectors
 *        describing the transformations performed to the original matrix A.
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh.
 *
 * SOLVEstruct (input) SOLVEstruct_t*
 *        Contains the information for the communication during the
 *        solution phase.
 *
 * Return value
 * ============
 * 
*/ int_t pdReDistribute_B_to_X(double *B, int_t m_loc, int nrhs, int_t ldb, int_t fst_row, int_t *ilsum, double *x, ScalePermstruct_t *ScalePermstruct, Glu_persist_t *Glu_persist, gridinfo_t *grid, SOLVEstruct_t *SOLVEstruct) { int *SendCnt, *SendCnt_nrhs, *RecvCnt, *RecvCnt_nrhs; int *sdispls, *sdispls_nrhs, *rdispls, *rdispls_nrhs; int *ptr_to_ibuf, *ptr_to_dbuf; int_t *perm_r, *perm_c; /* row and column permutation vectors */ int_t *send_ibuf, *recv_ibuf; double *send_dbuf, *recv_dbuf; int_t *xsup, *supno; int_t i, ii, irow, gbi, j, jj, k, knsupc, l, lk; int p, procs; pxgstrs_comm_t *gstrs_comm = SOLVEstruct->gstrs_comm; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Enter pdReDistribute_B_to_X()"); #endif /* ------------------------------------------------------------ INITIALIZATION. ------------------------------------------------------------*/ perm_r = ScalePermstruct->perm_r; perm_c = ScalePermstruct->perm_c; procs = grid->nprow * grid->npcol; xsup = Glu_persist->xsup; supno = Glu_persist->supno; SendCnt = gstrs_comm->B_to_X_SendCnt; SendCnt_nrhs = gstrs_comm->B_to_X_SendCnt + procs; RecvCnt = gstrs_comm->B_to_X_SendCnt + 2*procs; RecvCnt_nrhs = gstrs_comm->B_to_X_SendCnt + 3*procs; sdispls = gstrs_comm->B_to_X_SendCnt + 4*procs; sdispls_nrhs = gstrs_comm->B_to_X_SendCnt + 5*procs; rdispls = gstrs_comm->B_to_X_SendCnt + 6*procs; rdispls_nrhs = gstrs_comm->B_to_X_SendCnt + 7*procs; ptr_to_ibuf = gstrs_comm->ptr_to_ibuf; ptr_to_dbuf = gstrs_comm->ptr_to_dbuf; /* ------------------------------------------------------------ NOW COMMUNICATE THE ACTUAL DATA. ------------------------------------------------------------*/ k = sdispls[procs-1] + SendCnt[procs-1]; /* Total number of sends */ l = rdispls[procs-1] + RecvCnt[procs-1]; /* Total number of receives */ if ( !(send_ibuf = intMalloc_dist(k + l)) ) ABORT("Malloc fails for send_ibuf[]."); recv_ibuf = send_ibuf + k; if ( !(send_dbuf = doubleMalloc_dist((k + l)* (size_t)nrhs)) ) ABORT("Malloc fails for send_dbuf[]."); recv_dbuf = send_dbuf + k * nrhs; for (p = 0; p < procs; ++p) { ptr_to_ibuf[p] = sdispls[p]; ptr_to_dbuf[p] = sdispls[p] * nrhs; } /* Copy the row indices and values to the send buffer. */ for (i = 0, l = fst_row; i < m_loc; ++i, ++l) { irow = perm_c[perm_r[l]]; /* Row number in Pc*Pr*B */ gbi = BlockNum( irow ); p = PNUM( PROW(gbi,grid), PCOL(gbi,grid), grid ); /* Diagonal process */ k = ptr_to_ibuf[p]; send_ibuf[k] = irow; k = ptr_to_dbuf[p]; RHS_ITERATE(j) { /* RHS is stored in row major in the buffer. */ send_dbuf[k++] = B[i + j*ldb]; } ++ptr_to_ibuf[p]; ptr_to_dbuf[p] += nrhs; } /* Communicate the (permuted) row indices. */ MPI_Alltoallv(send_ibuf, SendCnt, sdispls, mpi_int_t, recv_ibuf, RecvCnt, rdispls, mpi_int_t, grid->comm); /* Communicate the numerical values. */ MPI_Alltoallv(send_dbuf, SendCnt_nrhs, sdispls_nrhs, MPI_DOUBLE, recv_dbuf, RecvCnt_nrhs, rdispls_nrhs, MPI_DOUBLE, grid->comm); /* ------------------------------------------------------------ Copy buffer into X on the diagonal processes. ------------------------------------------------------------*/ ii = 0; for (p = 0; p < procs; ++p) { jj = rdispls_nrhs[p]; for (i = 0; i < RecvCnt[p]; ++i) { /* Only the diagonal processes do this; the off-diagonal processes have 0 RecvCnt. */ irow = recv_ibuf[ii]; /* The permuted row index. */ k = BlockNum( irow ); knsupc = SuperSize( k ); lk = LBi( k, grid ); /* Local block number. */ l = X_BLK( lk ); x[l - XK_H] = k; /* Block number prepended in the header. */ irow = irow - FstBlockC(k); /* Relative row number in X-block */ RHS_ITERATE(j) { x[l + irow + j*knsupc] = recv_dbuf[jj++]; } ++ii; } } SUPERLU_FREE(send_ibuf); SUPERLU_FREE(send_dbuf); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Exit pdReDistribute_B_to_X()"); #endif return 0; } /* pdReDistribute_B_to_X */ /*! \brief * *
 * Purpose
 * =======
 *   Re-distribute X on the diagonal processes to B distributed on all
 *   the processes.
 *
 * Note
 * ====
 *   This routine can only be called after the routine pxgstrs_init(),
 *   in which the structures of the send and receive buffers are set up.
 * 
*/ int_t pdReDistribute_X_to_B(int_t n, double *B, int_t m_loc, int_t ldb, int_t fst_row, int_t nrhs, double *x, int_t *ilsum, ScalePermstruct_t *ScalePermstruct, Glu_persist_t *Glu_persist, gridinfo_t *grid, SOLVEstruct_t *SOLVEstruct) { int_t i, ii, irow, j, jj, k, knsupc, nsupers, l, lk; int_t *xsup, *supno; int *SendCnt, *SendCnt_nrhs, *RecvCnt, *RecvCnt_nrhs; int *sdispls, *rdispls, *sdispls_nrhs, *rdispls_nrhs; int *ptr_to_ibuf, *ptr_to_dbuf; int_t *send_ibuf, *recv_ibuf; double *send_dbuf, *recv_dbuf; int_t *row_to_proc = SOLVEstruct->row_to_proc; /* row-process mapping */ pxgstrs_comm_t *gstrs_comm = SOLVEstruct->gstrs_comm; int iam, p, q, pkk, procs; int_t num_diag_procs, *diag_procs; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Enter pdReDistribute_X_to_B()"); #endif /* ------------------------------------------------------------ INITIALIZATION. ------------------------------------------------------------*/ xsup = Glu_persist->xsup; supno = Glu_persist->supno; nsupers = Glu_persist->supno[n-1] + 1; iam = grid->iam; procs = grid->nprow * grid->npcol; SendCnt = gstrs_comm->X_to_B_SendCnt; SendCnt_nrhs = gstrs_comm->X_to_B_SendCnt + procs; RecvCnt = gstrs_comm->X_to_B_SendCnt + 2*procs; RecvCnt_nrhs = gstrs_comm->X_to_B_SendCnt + 3*procs; sdispls = gstrs_comm->X_to_B_SendCnt + 4*procs; sdispls_nrhs = gstrs_comm->X_to_B_SendCnt + 5*procs; rdispls = gstrs_comm->X_to_B_SendCnt + 6*procs; rdispls_nrhs = gstrs_comm->X_to_B_SendCnt + 7*procs; ptr_to_ibuf = gstrs_comm->ptr_to_ibuf; ptr_to_dbuf = gstrs_comm->ptr_to_dbuf; k = sdispls[procs-1] + SendCnt[procs-1]; /* Total number of sends */ l = rdispls[procs-1] + RecvCnt[procs-1]; /* Total number of receives */ if ( !(send_ibuf = intMalloc_dist(k + l)) ) ABORT("Malloc fails for send_ibuf[]."); recv_ibuf = send_ibuf + k; if ( !(send_dbuf = doubleMalloc_dist((k + l)*nrhs)) ) ABORT("Malloc fails for send_dbuf[]."); recv_dbuf = send_dbuf + k * nrhs; for (p = 0; p < procs; ++p) { ptr_to_ibuf[p] = sdispls[p]; ptr_to_dbuf[p] = sdispls_nrhs[p]; } num_diag_procs = SOLVEstruct->num_diag_procs; diag_procs = SOLVEstruct->diag_procs; for (p = 0; p < num_diag_procs; ++p) { /* For all diagonal processes. */ pkk = diag_procs[p]; if ( iam == pkk ) { for (k = p; k < nsupers; k += num_diag_procs) { knsupc = SuperSize( k ); lk = LBi( k, grid ); /* Local block number */ irow = FstBlockC( k ); l = X_BLK( lk ); for (i = 0; i < knsupc; ++i) { #if 0 ii = inv_perm_c[irow]; /* Apply X <== Pc'*Y */ #else ii = irow; #endif q = row_to_proc[ii]; jj = ptr_to_ibuf[q]; send_ibuf[jj] = ii; jj = ptr_to_dbuf[q]; RHS_ITERATE(j) { /* RHS stored in row major in buffer. */ send_dbuf[jj++] = x[l + i + j*knsupc]; } ++ptr_to_ibuf[q]; ptr_to_dbuf[q] += nrhs; ++irow; } } } } /* ------------------------------------------------------------ COMMUNICATE THE (PERMUTED) ROW INDICES AND NUMERICAL VALUES. ------------------------------------------------------------*/ MPI_Alltoallv(send_ibuf, SendCnt, sdispls, mpi_int_t, recv_ibuf, RecvCnt, rdispls, mpi_int_t, grid->comm); MPI_Alltoallv(send_dbuf, SendCnt_nrhs, sdispls_nrhs, MPI_DOUBLE, recv_dbuf, RecvCnt_nrhs, rdispls_nrhs, MPI_DOUBLE, grid->comm); /* ------------------------------------------------------------ COPY THE BUFFER INTO B. ------------------------------------------------------------*/ for (i = 0, k = 0; i < m_loc; ++i) { irow = recv_ibuf[i]; irow -= fst_row; /* Relative row number */ RHS_ITERATE(j) { /* RHS is stored in row major in the buffer. */ B[irow + j*ldb] = recv_dbuf[k++]; } } SUPERLU_FREE(send_ibuf); SUPERLU_FREE(send_dbuf); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Exit pdReDistribute_X_to_B()"); #endif return 0; } /* pdReDistribute_X_to_B */ /*! \brief * *
 * Purpose
 * =======
 *
 * PDGSTRSL solves a lower triangular system L*X = B,  with L being the
 * lower triangular factor computed previously by PDGSTRF.
 * If the equilibration, and row and column permutations were performed,
 * the LU factorization was performed for A1 where
 *     A1 = Pc*Pr*diag(R)*A*diag(C)*Pc^T = L*U
 * and the linear system solved is
 *     A1 * Y = Pc*Pr*B1, where B was overwritten by B1 = diag(R)*B, and
 * the permutation to B1 by Pc*Pr is applied internally in this routine.
 * 
 * Arguments
 * =========
 *
 * n      (input) int (global)
 *        The order of the system of linear equations.
 *
 * LUstruct (input) LUstruct_t*
 *        The distributed data structures storing L and U factors.
 *        The L and U factors are obtained from PDGSTRF for
 *        the possibly scaled and permuted matrix A.
 *        See superlu_ddefs.h for the definition of 'LUstruct_t'.
 *        A may be scaled and permuted into A1, so that
 *        A1 = Pc*Pr*diag(R)*A*diag(C)*Pc^T = L*U
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh. It contains the MPI communicator, the number
 *        of process rows (NPROW), the number of process columns (NPCOL),
 *        and my process rank. It is an input argument to all the
 *        parallel routines.
 *        Grid can be initialized by subroutine SUPERLU_GRIDINIT.
 *        See superlu_defs.h for the definition of 'gridinfo_t'.
 *
 * B      (input/output) double*
 *        On entry, the distributed right-hand side matrix of the possibly
 *        equilibrated system. That is, B may be overwritten by diag(R)*B.
 *        On exit, the distributed solution matrix Y of the possibly
 *        equilibrated system if info = 0, where Y = Pc*diag(C)^(-1)*X,
 *        and X is the solution of the original system.
 *
 * m_loc  (input) int (local)
 *        The local row dimension of matrix B.
 *
 * fst_row (input) int (global)
 *        The row number of B's first row in the global matrix.
 *
 * ldb    (input) int (local)
 *        The leading dimension of matrix B.
 *
 * nrhs   (input) int (global)
 *        Number of right-hand sides.
 * 
 * SOLVEstruct (output) SOLVEstruct_t* (global)
 *        Contains the information for the communication during the
 *        solution phase.
 *
 * stat   (output) SuperLUStat_t*
 *        Record the statistics about the triangular solves.
 *        See util.h for the definition of 'SuperLUStat_t'.
 *
 * info   (output) int*
 * 	   = 0: successful exit
 *	   < 0: if info = -i, the i-th argument had an illegal value
 * 
*/ void pdgstrsL(int_t n, LUstruct_t *LUstruct, ScalePermstruct_t *ScalePermstruct, gridinfo_t *grid, double *B, int_t m_loc, int_t fst_row, int_t ldb, int nrhs, SOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, int *info) { Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; double alpha = 1.0; double zero = 0.0; double *lsum; /* Local running sum of the updates to B-components */ double *x; /* X component at step k. */ /* NOTE: x and lsum are of same size. */ double *lusup, *dest; double *recvbuf, *tempv; double *rtemp; /* Result of full matrix-vector multiply. */ int_t **Ufstnz_br_ptr = Llu->Ufstnz_br_ptr; int_t iam, kcol, krow, mycol, myrow; int_t i, ii, il, j, jj, k, lb, ljb, lk, lptr, luptr; int_t nb, nlb, nub, nsupers; int_t *xsup, *supno, *lsub, *usub; int_t *ilsum; /* Starting position of each supernode in lsum (LOCAL)*/ int_t Pc, Pr; int knsupc, nsupr; int ldalsum; /* Number of lsum entries locally owned. */ int maxrecvsz, p, pi; int_t **Lrowind_bc_ptr; double **Lnzval_bc_ptr; MPI_Status status; #ifdef ISEND_IRECV MPI_Request *send_req, recv_req; #endif pxgstrs_comm_t *gstrs_comm = SOLVEstruct->gstrs_comm; /*-- Counts used for L-solve --*/ int_t *fmod; /* Modification count for L-solve -- Count the number of local block products to be summed into lsum[lk]. */ int_t **fsendx_plist = Llu->fsendx_plist; int_t nfrecvx = Llu->nfrecvx; /* Number of X components to be recv'd. */ int_t *frecv; /* Count of lsum[lk] contributions to be received from processes in this row. It is only valid on the diagonal processes. */ int_t nfrecvmod = 0; /* Count of total modifications to be recv'd. */ int_t nleaf = 0, nroot = 0; double t; #if ( DEBUGlevel>=2 ) int_t Ublocks = 0; #endif int_t *mod_bit = Llu->mod_bit; /* flag contribution from each row block */ t = SuperLU_timer_(); /* Test input parameters. */ *info = 0; if ( n < 0 ) *info = -1; else if ( nrhs < 0 ) *info = -9; if ( *info ) { pxerbla("PDGSTRS", grid, -*info); return; } /* * Initialization. */ iam = grid->iam; Pc = grid->npcol; Pr = grid->nprow; myrow = MYROW( iam, grid ); mycol = MYCOL( iam, grid ); xsup = Glu_persist->xsup; supno = Glu_persist->supno; nsupers = supno[n-1] + 1; Lrowind_bc_ptr = Llu->Lrowind_bc_ptr; Lnzval_bc_ptr = Llu->Lnzval_bc_ptr; nlb = CEILING( nsupers, Pr ); /* Number of local block rows. */ #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter pdgstrsL()"); #endif stat->ops[SOLVE] = 0.0; Llu->SolveMsgSent = 0; /* Save the count to be altered so it can be used by subsequent call to PDGSTRS. */ if ( !(fmod = intMalloc_dist(nlb)) ) ABORT("Calloc fails for fmod[]."); for (i = 0; i < nlb; ++i) fmod[i] = Llu->fmod[i]; if ( !(frecv = intMalloc_dist(nlb)) ) ABORT("Malloc fails for frecv[]."); Llu->frecv = frecv; #ifdef ISEND_IRECV k = SUPERLU_MAX( Llu->nfsendx, Llu->nbsendx ) + nlb; if ( !(send_req = (MPI_Request*) SUPERLU_MALLOC(k*sizeof(MPI_Request))) ) ABORT("Malloc fails for send_req[]."); #endif #ifdef _CRAY ftcs1 = _cptofcd("L", strlen("L")); ftcs2 = _cptofcd("N", strlen("N")); ftcs3 = _cptofcd("U", strlen("U")); #endif /* Obtain ilsum[] and ldalsum for process column 0. */ ilsum = Llu->ilsum; ldalsum = Llu->ldalsum; /* Allocate working storage. */ knsupc = sp_ienv_dist(3); maxrecvsz = knsupc * nrhs + SUPERLU_MAX( XK_H, LSUM_H ); if ( !(lsum = doubleCalloc_dist(((size_t)ldalsum)*nrhs + nlb*LSUM_H)) ) ABORT("Calloc fails for lsum[]."); if ( !(x = doubleMalloc_dist(ldalsum * nrhs + nlb * XK_H)) ) ABORT("Malloc fails for x[]."); if ( !(recvbuf = doubleMalloc_dist(maxrecvsz)) ) ABORT("Malloc fails for recvbuf[]."); if ( !(rtemp = doubleCalloc_dist(maxrecvsz)) ) ABORT("Malloc fails for rtemp[]."); /*--------------------------------------------------- * Forward solve Ly = b. *---------------------------------------------------*/ /* Redistribute B into X on the diagonal processes. */ pdReDistribute_B_to_X(B, m_loc, nrhs, ldb, fst_row, ilsum, x, ScalePermstruct, Glu_persist, grid, SOLVEstruct); /* Set up the headers in lsum[]. */ ii = 0; for (k = 0; k < nsupers; ++k) { knsupc = SuperSize( k ); krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ il = LSUM_BLK( lk ); lsum[il - LSUM_H] = k; /* Block number prepended in the header. */ } ii += knsupc; } /* * Compute frecv[] and nfrecvmod counts on the diagonal processes. */ { superlu_scope_t *scp = &grid->rscp; for (k = 0; k < nlb; ++k) mod_bit[k] = 0; for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* local block number */ kcol = PCOL( k, grid ); if ( mycol != kcol && fmod[lk] ) mod_bit[lk] = 1; /* contribution from off-diagonal */ } } /*PrintInt10("mod_bit", nlb, mod_bit);*/ #if ( PROFlevel>=2 ) t_reduce_tmp = SuperLU_timer_(); #endif /* Every process receives the count, but it is only useful on the diagonal processes. */ MPI_Allreduce( mod_bit, frecv, nlb, mpi_int_t, MPI_SUM, scp->comm ); #if ( PROFlevel>=2 ) t_reduce += SuperLU_timer_() - t_reduce_tmp; #endif for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* local block number */ kcol = PCOL( k, grid ); if ( mycol == kcol ) { /* diagonal process */ nfrecvmod += frecv[lk]; if ( !frecv[lk] && !fmod[lk] ) ++nleaf; } } } } /* --------------------------------------------------------- Solve the leaf nodes first by all the diagonal processes. --------------------------------------------------------- */ #if ( DEBUGlevel>=2 ) printf("(%2d) nleaf %4d\n", iam, nleaf); #endif for (k = 0; k < nsupers && nleaf; ++k) { krow = PROW( k, grid ); kcol = PCOL( k, grid ); if ( myrow == krow && mycol == kcol ) { /* Diagonal process */ knsupc = SuperSize( k ); lk = LBi( k, grid ); if ( frecv[lk]==0 && fmod[lk]==0 ) { fmod[lk] = -1; /* Do not solve X[k] in the future. */ ii = X_BLK( lk ); lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY STRSM(ftcs1, ftcs1, ftcs2, ftcs3, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #elif defined (USE_VENDOR_BLAS) dtrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc, 1, 1, 1, 1); #else dtrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif stat->ops[SOLVE] += knsupc * (knsupc - 1) * nrhs; --nleaf; #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ for (p = 0; p < Pr; ++p) { if ( fsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); #ifdef ISEND_IRECV MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm, &send_req[Llu->SolveMsgSent++]); #else MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm ); #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii-XK_H], pi); #endif } } /* * Perform local block modifications: lsum[i] -= L_i,k * X[k] */ nb = lsub[0] - 1; lptr = BC_HEADER + LB_DESCRIPTOR + knsupc; luptr = knsupc; /* Skip diagonal block L(k,k). */ dlsum_fmod(lsum, x, &x[ii], rtemp, nrhs, knsupc, k, fmod, nb, lptr, luptr, xsup, grid, Llu, send_req, stat); } } /* if diagonal process ... */ } /* for k ... */ /* ----------------------------------------------------------- Compute the internal nodes asynchronously by all processes. ----------------------------------------------------------- */ #if ( DEBUGlevel>=2 ) printf("(%2d) nfrecvx %4d, nfrecvmod %4d, nleaf %4d\n", iam, nfrecvx, nfrecvmod, nleaf); #endif while ( nfrecvx || nfrecvmod ) { /* While not finished. */ /* Receive a message. */ #ifdef ISEND_IRECV /* -MPI- FATAL: Remote protocol queue full */ MPI_Irecv( recvbuf, maxrecvsz, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, grid->comm, &recv_req ); MPI_Wait( &recv_req, &status ); #else MPI_Recv( recvbuf, maxrecvsz, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, grid->comm, &status ); #endif k = *recvbuf; #if ( DEBUGlevel>=2 ) printf("(%2d) Recv'd block %d, tag %2d\n", iam, k, status.MPI_TAG); #endif switch ( status.MPI_TAG ) { case Xk: --nfrecvx; lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; if ( lsub ) { nb = lsub[0]; lptr = BC_HEADER; luptr = 0; knsupc = SuperSize( k ); /* * Perform local block modifications: lsum[i] -= L_i,k * X[k] */ dlsum_fmod(lsum, x, &recvbuf[XK_H], rtemp, nrhs, knsupc, k, fmod, nb, lptr, luptr, xsup, grid, Llu, send_req, stat); } /* if lsub */ break; case LSUM: /* Receiver must be a diagonal process */ --nfrecvmod; lk = LBi( k, grid ); /* Local block number, row-wise. */ ii = X_BLK( lk ); knsupc = SuperSize( k ); tempv = &recvbuf[LSUM_H]; RHS_ITERATE(j) { for (i = 0; i < knsupc; ++i) x[i + ii + j*knsupc] += tempv[i + j*knsupc]; } if ( (--frecv[lk])==0 && fmod[lk]==0 ) { fmod[lk] = -1; /* Do not solve X[k] in the future. */ lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY STRSM(ftcs1, ftcs1, ftcs2, ftcs3, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #elif defined (USE_VENDOR_BLAS) dtrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc, 1, 1, 1, 1); #else dtrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif stat->ops[SOLVE] += knsupc * (knsupc - 1) * nrhs; #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ kcol = PCOL( k, grid ); for (p = 0; p < Pr; ++p) { if ( fsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); #ifdef ISEND_IRECV MPI_Isend( &x[ii-XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm, &send_req[Llu->SolveMsgSent++]); #else MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm ); #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii-XK_H], pi); #endif } } /* * Perform local block modifications. */ nb = lsub[0] - 1; lptr = BC_HEADER + LB_DESCRIPTOR + knsupc; luptr = knsupc; /* Skip diagonal block L(k,k). */ dlsum_fmod(lsum, x, &x[ii], rtemp, nrhs, knsupc, k, fmod, nb, lptr, luptr, xsup, grid, Llu, send_req, stat); } /* if */ break; #if ( DEBUGlevel>=2 ) default: printf("(%2d) Recv'd wrong message tag %4d\n", status.MPI_TAG); break; #endif } /* switch */ } /* while not finished ... */ #if ( PRNTlevel>=2 ) t = SuperLU_timer_() - t; if ( !iam ) printf(".. L-solve time\t%8.2f\n", t); t = SuperLU_timer_(); #endif #if ( DEBUGlevel==2 ) { printf("(%d) .. After L-solve: y =\n", iam); for (i = 0, k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); kcol = PCOL( k, grid ); if ( myrow == krow && mycol == kcol ) { /* Diagonal process */ knsupc = SuperSize( k ); lk = LBi( k, grid ); ii = X_BLK( lk ); for (j = 0; j < knsupc; ++j) printf("\t(%d)\t%4d\t%.10f\n", iam, xsup[k]+j, x[ii+j]); fflush(stdout); } MPI_Barrier( grid->comm ); } } #endif SUPERLU_FREE(fmod); SUPERLU_FREE(frecv); SUPERLU_FREE(rtemp); #ifdef ISEND_IRECV for (i = 0; i < Llu->SolveMsgSent; ++i) MPI_Request_free(&send_req[i]); Llu->SolveMsgSent = 0; #endif /* Re-distribute X on the diagonal processes to B distributed on all the processes. */ pdReDistribute_X_to_B(n, B, m_loc, ldb, fst_row, nrhs, x, ilsum, ScalePermstruct, Glu_persist, grid, SOLVEstruct); /* Deallocate storage. */ SUPERLU_FREE(lsum); SUPERLU_FREE(x); SUPERLU_FREE(recvbuf); #ifdef ISEND_IRECV for (i = 0; i < Llu->SolveMsgSent; ++i) MPI_Request_free(&send_req[i]); SUPERLU_FREE(send_req); #endif stat->utime[SOLVE] = SuperLU_timer_() - t; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pdgstrsL()"); #endif } /* PDGSTRS */ SuperLU_DIST_5.3.0/SRC/dutil_dist.c0000644013363400111340000004156413233431301015605 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Several matrix utilities * *
 * -- Distributed SuperLU routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * March 15, 2003
 *
 */

#include 
#include "superlu_ddefs.h"

void
dCreate_CompCol_Matrix_dist(SuperMatrix *A, int_t m, int_t n, int_t nnz, 
			    double *nzval, int_t *rowind, int_t *colptr,
			    Stype_t stype, Dtype_t dtype, Mtype_t mtype)
{
    NCformat *Astore;

    A->Stype = stype;
    A->Dtype = dtype;
    A->Mtype = mtype;
    A->nrow = m;
    A->ncol = n;
    A->Store = (void *) SUPERLU_MALLOC( sizeof(NCformat) );
    if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
    Astore = (NCformat *) A->Store;
    Astore->nnz = nnz;
    Astore->nzval = nzval;
    Astore->rowind = rowind;
    Astore->colptr = colptr;
}

void
dCreate_CompRowLoc_Matrix_dist(SuperMatrix *A, int_t m, int_t n,
			       int_t nnz_loc, int_t m_loc, int_t fst_row,
			       double *nzval, int_t *colind, int_t *rowptr,
			       Stype_t stype, Dtype_t dtype, Mtype_t mtype)
{
    NRformat_loc *Astore;

    A->Stype = stype;
    A->Dtype = dtype;
    A->Mtype = mtype;
    A->nrow = m;
    A->ncol = n;
    A->Store = (void *) SUPERLU_MALLOC( sizeof(NRformat_loc) );
    if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
    Astore = (NRformat_loc *) A->Store;
    Astore->nnz_loc = nnz_loc;
    Astore->fst_row = fst_row;
    Astore->m_loc = m_loc;
    Astore->nzval = nzval;
    Astore->colind = colind;
    Astore->rowptr = rowptr;
}

/*! \brief Convert a row compressed storage into a column compressed storage.
 */
void
dCompRow_to_CompCol_dist(int_t m, int_t n, int_t nnz, 
                         double *a, int_t *colind, int_t *rowptr,
                         double **at, int_t **rowind, int_t **colptr)
{
    register int i, j, col, relpos;
    int_t *marker;

    /* Allocate storage for another copy of the matrix. */
    *at = (double *) doubleMalloc_dist(nnz);
    *rowind = intMalloc_dist(nnz);
    *colptr = intMalloc_dist(n+1);
    marker = intCalloc_dist(n);
    
    /* Get counts of each column of A, and set up column pointers */
    for (i = 0; i < m; ++i)
	for (j = rowptr[i]; j < rowptr[i+1]; ++j) ++marker[colind[j]];
    (*colptr)[0] = 0;
    for (j = 0; j < n; ++j) {
	(*colptr)[j+1] = (*colptr)[j] + marker[j];
	marker[j] = (*colptr)[j];
    }

    /* Transfer the matrix into the compressed column storage. */
    for (i = 0; i < m; ++i) {
	for (j = rowptr[i]; j < rowptr[i+1]; ++j) {
	    col = colind[j];
	    relpos = marker[col];
	    (*rowind)[relpos] = i;
	    (*at)[relpos] = a[j];
	    ++marker[col];
	}
    }

    SUPERLU_FREE(marker);
}

/*! \brief Copy matrix A into matrix B. */
void
dCopy_CompCol_Matrix_dist(SuperMatrix *A, SuperMatrix *B)
{
    NCformat *Astore, *Bstore;
    int      ncol, nnz, i;

    B->Stype = A->Stype;
    B->Dtype = A->Dtype;
    B->Mtype = A->Mtype;
    B->nrow  = A->nrow;;
    B->ncol  = ncol = A->ncol;
    Astore   = (NCformat *) A->Store;
    Bstore   = (NCformat *) B->Store;
    Bstore->nnz = nnz = Astore->nnz;
    for (i = 0; i < nnz; ++i)
	((double *)Bstore->nzval)[i] = ((double *)Astore->nzval)[i];
    for (i = 0; i < nnz; ++i) Bstore->rowind[i] = Astore->rowind[i];
    for (i = 0; i <= ncol; ++i) Bstore->colptr[i] = Astore->colptr[i];
}


void dPrint_CompCol_Matrix_dist(SuperMatrix *A)
{
    NCformat     *Astore;
    register int i;
    double       *dp;
    
    printf("\nCompCol matrix: ");
    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
    Astore = (NCformat *) A->Store;
    printf("nrow %lld, ncol %lld, nnz %lld\n", (long long) A->nrow,
	    (long long) A->ncol, (long long) Astore->nnz);
    if ( (dp = (double *) Astore->nzval) != NULL ) {
        printf("nzval:\n");
        for (i = 0; i < Astore->nnz; ++i) printf("%f  ", dp[i]);
    }
    printf("\nrowind:\n");
    for (i = 0; i < Astore->nnz; ++i) 
        printf("%lld  ", (long long) Astore->rowind[i]);
    printf("\ncolptr:\n");
    for (i = 0; i <= A->ncol; ++i) 
        printf("%lld  ", (long long) Astore->colptr[i]);
    printf("\nend CompCol matrix.\n");
}

void dPrint_Dense_Matrix_dist(SuperMatrix *A)
{
    DNformat     *Astore;
    register int i;
    double       *dp;
    
    printf("\nDense matrix: ");
    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
    Astore = (DNformat *) A->Store;
    dp = (double *) Astore->nzval;
    printf("nrow %lld, ncol %lld, lda %lld\n", 
        (long long) A->nrow, (long long) A->ncol, (long long) Astore->lda);
    printf("\nnzval: ");
    for (i = 0; i < A->nrow; ++i) printf("%f  ", dp[i]);
    printf("\nend Dense matrix.\n");
}

int dPrint_CompRowLoc_Matrix_dist(SuperMatrix *A)
{
    NRformat_loc  *Astore;
    int_t  nnz_loc, m_loc;
    double  *dp;
    
    printf("\n==== CompRowLoc matrix: ");
    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
    Astore = (NRformat_loc *) A->Store;
    printf("nrow %ld, ncol %ld\n", 
            (long int) A->nrow, (long int) A->ncol);
    nnz_loc = Astore->nnz_loc; m_loc = Astore->m_loc;
    printf("nnz_loc %ld, m_loc %ld, fst_row %ld\n", (long int) nnz_loc, 
            (long int) m_loc, (long int) Astore->fst_row);
    PrintInt10("rowptr", m_loc+1, Astore->rowptr);
    PrintInt10("colind", nnz_loc, Astore->colind);
    if ( (dp = (double *) Astore->nzval) != NULL )
        PrintDouble5("nzval", nnz_loc, dp);
    printf("==== end CompRowLoc matrix\n");
    return 0;
}

int file_dPrint_CompRowLoc_Matrix_dist(FILE *fp, SuperMatrix *A)
{
    NRformat_loc     *Astore;
    int_t  nnz_loc, m_loc;
    double       *dp;
    
    fprintf(fp, "\n==== CompRowLoc matrix: ");
    fprintf(fp, "Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
    Astore = (NRformat_loc *) A->Store;
    fprintf(fp, "nrow %ld, ncol %ld\n", (long int) A->nrow, (long int) A->ncol);
    nnz_loc = Astore->nnz_loc; m_loc = Astore->m_loc;
    fprintf(fp, "nnz_loc %ld, m_loc %ld, fst_row %ld\n", (long int) nnz_loc,
            (long int) m_loc, (long int) Astore->fst_row);
    file_PrintInt10(fp, "rowptr", m_loc+1, Astore->rowptr);
    file_PrintInt10(fp, "colind", nnz_loc, Astore->colind);
    if ( (dp = (double *) Astore->nzval) != NULL )
        file_PrintDouble5(fp, "nzval", nnz_loc, dp);
    fprintf(fp, "==== end CompRowLoc matrix\n");
    return 0;
}

void
dCreate_Dense_Matrix_dist(SuperMatrix *X, int_t m, int_t n, double *x,
			  int_t ldx, Stype_t stype, Dtype_t dtype,
			  Mtype_t mtype)
{
    DNformat    *Xstore;
    
    X->Stype = stype;
    X->Dtype = dtype;
    X->Mtype = mtype;
    X->nrow = m;
    X->ncol = n;
    X->Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
    if ( !(X->Store) ) ABORT("SUPERLU_MALLOC fails for X->Store");
    Xstore = (DNformat *) X->Store;
    Xstore->lda = ldx;
    Xstore->nzval = (double *) x;
}

void
dCopy_Dense_Matrix_dist(int_t M, int_t N, double *X, int_t ldx,
			double *Y, int_t ldy)
{
/*! \brief
 *
 * 
 *  Purpose
 *  =======
 *
 *  Copies a two-dimensional matrix X to another matrix Y.
 * 
*/ int i, j; for (j = 0; j < N; ++j) for (i = 0; i < M; ++i) Y[i + j*ldy] = X[i + j*ldx]; } void dCreate_SuperNode_Matrix_dist(SuperMatrix *L, int_t m, int_t n, int_t nnz, double *nzval, int_t *nzval_colptr, int_t *rowind, int_t *rowind_colptr, int_t *col_to_sup, int_t *sup_to_col, Stype_t stype, Dtype_t dtype, Mtype_t mtype) { SCformat *Lstore; L->Stype = stype; L->Dtype = dtype; L->Mtype = mtype; L->nrow = m; L->ncol = n; L->Store = (void *) SUPERLU_MALLOC( sizeof(SCformat) ); if ( !(L->Store) ) ABORT("SUPERLU_MALLOC fails for L->Store"); Lstore = L->Store; Lstore->nnz = nnz; Lstore->nsuper = col_to_sup[n]; Lstore->nzval = nzval; Lstore->nzval_colptr = nzval_colptr; Lstore->rowind = rowind; Lstore->rowind_colptr = rowind_colptr; Lstore->col_to_sup = col_to_sup; Lstore->sup_to_col = sup_to_col; } void dGenXtrue_dist(int_t n, int_t nrhs, double *x, int_t ldx) { int i, j; for (j = 0; j < nrhs; ++j) for (i = 0; i < n; ++i) { if ( i % 2 ) x[i + j*ldx] = 1.0;/* + (double)(i+1.)/n;*/ else x[i + j*ldx] = 1.0; } } /*! \brief Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's */ void dFillRHS_dist(char *trans, int_t nrhs, double *x, int_t ldx, SuperMatrix *A, double *rhs, int_t ldb) { double one = 1.0; double zero = 0.0; sp_dgemm_dist(trans, nrhs, one, A, x, ldx, zero, rhs, ldb); } /*! \brief Fills a double precision array with a given value. */ void dfill_dist(double *a, int_t alen, double dval) { register int_t i; for (i = 0; i < alen; i++) a[i] = dval; } /*! \brief Check the inf-norm of the error vector */ void dinf_norm_error_dist(int_t n, int_t nrhs, double *x, int_t ldx, double *xtrue, int_t ldxtrue, gridinfo_t *grid) { double err, xnorm; double *x_work, *xtrue_work; int i, j; for (j = 0; j < nrhs; j++) { x_work = &x[j*ldx]; xtrue_work = &xtrue[j*ldxtrue]; err = xnorm = 0.0; for (i = 0; i < n; i++) { err = SUPERLU_MAX(err, fabs(x_work[i] - xtrue_work[i])); xnorm = SUPERLU_MAX(xnorm, fabs(x_work[i])); } err = err / xnorm; printf("\tRHS %2d: ||X-Xtrue||/||X|| = %e\n", j, err); } } void PrintDouble5(char *name, int_t len, double *x) { register int_t i; printf("%10s:", name); for (i = 0; i < len; ++i) { if ( i % 5 == 0 ) printf("\n[%ld-%ld] ", (long int) i, (long int) i+4); printf("%14e", x[i]); } printf("\n"); } int file_PrintDouble5(FILE *fp, char *name, int_t len, double *x) { register int_t i; fprintf(fp, "%10s:", name); for (i = 0; i < len; ++i) { if ( i % 5 == 0 ) fprintf(fp, "\n[%ld-%ld] ", (long int) i, (long int) i+4); fprintf(fp, "%14e", x[i]); } fprintf(fp, "\n"); return 0; } /*! \brief Print the blocks in the factored matrix L. */ void dPrintLblocks(int iam, int_t nsupers, gridinfo_t *grid, Glu_persist_t *Glu_persist, LocalLU_t *Llu) { register int c, extra, gb, j, lb, nsupc, nsupr, len, nb, ncb; register int_t k, mycol, r; int_t *xsup = Glu_persist->xsup; int_t *index; double *nzval; printf("\n[%d] L BLOCKS IN COLUMN-MAJOR ORDER -->\n", iam); ncb = nsupers / grid->npcol; extra = nsupers % grid->npcol; mycol = MYCOL( iam, grid ); if ( mycol < extra ) ++ncb; for (lb = 0; lb < ncb; ++lb) { index = Llu->Lrowind_bc_ptr[lb]; if ( index ) { /* Not an empty column */ nzval = Llu->Lnzval_bc_ptr[lb]; nb = index[0]; nsupr = index[1]; gb = lb * grid->npcol + mycol; nsupc = SuperSize( gb ); printf("[%d] block column %d (local # %d), nsupc %d, # row blocks %d\n", iam, gb, lb, nsupc, nb); for (c = 0, k = BC_HEADER, r = 0; c < nb; ++c) { len = index[k+1]; printf("[%d] row-block %d: block # " IFMT "\tlength %d\n", iam, c, index[k], len); PrintInt10("lsub", len, &index[k+LB_DESCRIPTOR]); for (j = 0; j < nsupc; ++j) { PrintDouble5("nzval", len, &nzval[r + j*nsupr]); } k += LB_DESCRIPTOR + len; r += len; } } printf("(%d)", iam); PrintInt32("ToSendR[]", grid->npcol, Llu->ToSendR[lb]); PrintInt10("fsendx_plist[]", grid->nprow, Llu->fsendx_plist[lb]); } printf("nfrecvx " IFMT "\n", Llu->nfrecvx); k = CEILING( nsupers, grid->nprow ); PrintInt10("fmod", k, Llu->fmod); } /* DPRINTLBLOCKS */ /*! \brief Print the blocks in the factored matrix U. */ void dPrintUblocks(int iam, int_t nsupers, gridinfo_t *grid, Glu_persist_t *Glu_persist, LocalLU_t *Llu) { register int c, extra, jb, k, lb, len, nb, nrb, nsupc; register int_t myrow, r; int_t *xsup = Glu_persist->xsup; int_t *index; double *nzval; printf("\n[%d] U BLOCKS IN ROW-MAJOR ORDER -->\n", iam); nrb = nsupers / grid->nprow; extra = nsupers % grid->nprow; myrow = MYROW( iam, grid ); if ( myrow < extra ) ++nrb; for (lb = 0; lb < nrb; ++lb) { index = Llu->Ufstnz_br_ptr[lb]; if ( index ) { /* Not an empty row */ nzval = Llu->Unzval_br_ptr[lb]; nb = index[0]; printf("[%d] block row " IFMT " (local # %d), # column blocks %d\n", iam, lb*grid->nprow+myrow, lb, nb); r = 0; for (c = 0, k = BR_HEADER; c < nb; ++c) { jb = index[k]; len = index[k+1]; printf("[%d] col-block %d: block # %d\tlength " IFMT "\n", iam, c, jb, index[k+1]); nsupc = SuperSize( jb ); PrintInt10("fstnz", nsupc, &index[k+UB_DESCRIPTOR]); PrintDouble5("nzval", len, &nzval[r]); k += UB_DESCRIPTOR + nsupc; r += len; } printf("[%d] ToSendD[] %d\n", iam, Llu->ToSendD[lb]); } } } /* DPRINTUBLOCKS */ int dprint_gsmv_comm(FILE *fp, int_t m_loc, pdgsmv_comm_t *gsmv_comm, gridinfo_t *grid) { int_t procs = grid->nprow*grid->npcol; fprintf(fp, "TotalIndSend " IFMT "\tTotalValSend " IFMT "\n", gsmv_comm->TotalIndSend, gsmv_comm->TotalValSend); file_PrintInt10(fp, "extern_start", m_loc, gsmv_comm->extern_start); file_PrintInt10(fp, "ind_tosend", gsmv_comm->TotalIndSend, gsmv_comm->ind_tosend); file_PrintInt10(fp, "ind_torecv", gsmv_comm->TotalValSend, gsmv_comm->ind_torecv); file_PrintInt10(fp, "ptr_ind_tosend", procs+1, gsmv_comm->ptr_ind_tosend); file_PrintInt10(fp, "ptr_ind_torecv", procs+1, gsmv_comm->ptr_ind_torecv); file_PrintInt32(fp, "SendCounts", procs, gsmv_comm->SendCounts); file_PrintInt32(fp, "RecvCounts", procs, gsmv_comm->RecvCounts); return 0; } void GenXtrueRHS(int nrhs, SuperMatrix *A, Glu_persist_t *Glu_persist, gridinfo_t *grid, double **xact, int *ldx, double **b, int *ldb) { int_t gb, gbrow, i, iam, irow, j, lb, lsup, myrow, n, nlrows, nsupr, nsupers, rel; int_t *supno, *xsup, *lxsup; double *x, *bb; NCformat *Astore; double *Aval; n = A->ncol; *ldb = 0; supno = Glu_persist->supno; xsup = Glu_persist->xsup; nsupers = supno[n-1] + 1; iam = grid->iam; myrow = MYROW( iam, grid ); Astore = (NCformat *) A->Store; Aval = (double *) Astore->nzval; lb = CEILING( nsupers, grid->nprow ) + 1; if ( !(lxsup = intMalloc_dist(lb)) ) ABORT("Malloc fails for lxsup[]."); lsup = 0; nlrows = 0; for (j = 0; j < nsupers; ++j) { i = PROW( j, grid ); if ( myrow == i ) { nsupr = SuperSize( j ); *ldb += nsupr; lxsup[lsup++] = nlrows; nlrows += nsupr; } } *ldx = n; if ( !(x = doubleMalloc_dist(((size_t)*ldx) * nrhs)) ) ABORT("Malloc fails for x[]."); if ( !(bb = doubleCalloc_dist(*ldb * nrhs)) ) ABORT("Calloc fails for bb[]."); for (j = 0; j < nrhs; ++j) for (i = 0; i < n; ++i) x[i + j*(*ldx)] = 1.0; /* Form b = A*x. */ for (j = 0; j < n; ++j) for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; gb = supno[irow]; gbrow = PROW( gb, grid ); if ( myrow == gbrow ) { rel = irow - xsup[gb]; lb = LBi( gb, grid ); bb[lxsup[lb] + rel] += Aval[i] * x[j]; } } /* Memory allocated but not freed: xact, b */ *xact = x; *b = bb; SUPERLU_FREE(lxsup); #if ( PRNTlevel>=2 ) for (i = 0; i < grid->nprow*grid->npcol; ++i) { if ( iam == i ) { printf("\n(%d)\n", iam); PrintDouble5("rhs", *ldb, *b); } MPI_Barrier( grid->comm ); } #endif } /* GENXTRUERHS */ /* g5.rua b = A*x y = L\b 0 1 1.0000 1 0 0.2500 2 1 1.0000 3 2 2.0000 4 1 1.7500 5 1 1.8917 6 0 1.1879 7 2 2.0000 8 2 2.0000 9 1 1.0000 10 1 1.7500 11 0 0 12 1 1.8750 13 2 2.0000 14 1 1.0000 15 0 0.2500 16 1 1.7667 17 0 0.6419 18 1 2.2504 19 0 1.1563 20 0 0.9069 21 0 1.4269 22 1 2.7510 23 1 2.2289 24 0 2.4332 g6.rua b=A*x y=L\b 0 0 0 1 1 1.0000 2 1 1.0000 3 2 2.5000 4 0 0 5 2 2.0000 6 1 1.0000 7 1 1.7500 8 1 1.0000 9 0 0.2500 10 0 0.5667 11 1 2.0787 12 0 0.8011 13 1 1.9838 14 1 1.0000 15 1 1.0000 16 2 2.5000 17 0 0.8571 18 0 0 19 1 1.0000 20 0 0.2500 21 1 1.0000 22 2 2.0000 23 1 1.7500 24 1 1.8917 25 0 1.1879 26 0 0.8011 27 1 1.9861 28 1 2.0199 29 0 1.3620 30 0 0.6136 31 1 2.3677 32 0 1.1011 33 0 1.5258 34 0 1.7628 35 0 2.1658 */ SuperLU_DIST_5.3.0/SRC/dcomplex_dist.c0000644013363400111340000000365613233431301016277 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Defines common arithmetic operations for complex type * *
 * -- Distributed SuperLU routine (version 1.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 * 
*/ #include #include #include #include "dcomplex.h" /* Complex Division c = a/b */ void slud_z_div(doublecomplex *c, doublecomplex *a, doublecomplex *b) { double ratio, den; double abr, abi, cr, ci; if( (abr = b->r) < 0.) abr = - abr; if( (abi = b->i) < 0.) abi = - abi; if( abr <= abi ) { if (abi == 0) { fprintf(stderr, "slud_z_div.c: division by zero"); exit(-1); } ratio = b->r / b->i ; den = b->i * (1 + ratio*ratio); cr = (a->r*ratio + a->i) / den; ci = (a->i*ratio - a->r) / den; } else { ratio = b->i / b->r ; den = b->r * (1 + ratio*ratio); cr = (a->r + a->i*ratio) / den; ci = (a->i - a->r*ratio) / den; } c->r = cr; c->i = ci; } /* Returns sqrt(z.r^2 + z.i^2) */ double slud_z_abs(doublecomplex *z) { double temp; double real = z->r; double imag = z->i; if (real < 0) real = -real; if (imag < 0) imag = -imag; if (imag > real) { temp = real; real = imag; imag = temp; } if ((real+imag) == real) return(real); temp = imag/real; temp = real*sqrt(1.0 + temp*temp); /*overflow!!*/ return (temp); } /* Approximates the abs */ /* Returns abs(z.r) + abs(z.i) */ double slud_z_abs1(doublecomplex *z) { double real = z->r; double imag = z->i; if (real < 0) real = -real; if (imag < 0) imag = -imag; return (real + imag); } SuperLU_DIST_5.3.0/SRC/pdgstrs_Bglobal_Bsend.c0000644013363400111340000007254713233431301017671 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Solves a system of distributed linear equations * *
 * -- Distributed SuperLU routine (version 1.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 *
 * Modified:
 *     Feburary 7, 2001    use MPI_Isend/MPI_Irecv
 *     October 2, 2001     use MPI_Isend/MPI_Irecv with MPI_Test
 * 
*/ #include "superlu_ddefs.h" /*#define ISEND_IRECV*/ /* Parry's change Use MPI_Bsend with a large buffer attached in the main program */ #define BSEND 1 /* * Function prototypes */ #ifdef _CRAY fortran void STRSM(_fcd, _fcd, _fcd, _fcd, int*, int*, double*, double*, int*, double*, int*); fortran void SGEMM(_fcd, _fcd, int*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int*); _fcd ftcs1; _fcd ftcs2; _fcd ftcs3; #endif /*! \brief * *
 * Purpose
 * =======
 *
 * pdgstrs_Bglobal solves a system of distributed linear equations
 * A*X = B with a general N-by-N matrix A using the LU factorization
 * computed by pdgstrf.
 * 
 * Arguments
 * =========
 *
 * n      (input) int (global)
 *        The order of the system of linear equations.
 *
 * LUstruct (input) LUstruct_t*
 *        The distributed data structures storing L and U factors.
 *        The L and U factors are obtained from pdgstrf for
 *        the possibly scaled and permuted matrix A.
 *        See superlu_ddefs.h for the definition of 'LUstruct_t'.
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh. It contains the MPI communicator, the number
 *        of process rows (NPROW), the number of process columns (NPCOL),
 *        and my process rank. It is an input argument to all the
 *        parallel routines.
 *        Grid can be initialized by subroutine SUPERLU_GRIDINIT.
 *        See superlu_ddefs.h for the definition of 'gridinfo_t'.
 *
 * B      (input/output) double*
 *        On entry, the right-hand side matrix of the possibly equilibrated
 *        and row permuted system.
 *        On exit, the solution matrix of the possibly equilibrated
 *        and row permuted system if info = 0;
 *
 *        NOTE: Currently, the N-by-NRHS  matrix B must reside on all 
 *              processes when calling this routine.
 *
 * ldb    (input) int (global)
 *        Leading dimension of matrix B.
 *
 * nrhs   (input) int (global)
 *        Number of right-hand sides.
 *
 * stat   (output) SuperLUStat_t*
 *        Record the statistics about the triangular solves.
 *        See util.h for the definition of 'SuperLUStat_t'.
 *
 * info   (output) int*
 * 	   = 0: successful exit
 *	   < 0: if info = -i, the i-th argument had an illegal value
 * 
*/ void pdgstrs_Bglobal(int_t n, LUstruct_t *LUstruct, gridinfo_t *grid, double *B, int_t ldb, int nrhs, SuperLUStat_t *stat, int *info) { Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; double alpha = 1.0; double *lsum; /* Local running sum of the updates to B-components */ double *x; /* X component at step k. */ double *lusup, *dest; double *recvbuf, *tempv; double *rtemp; /* Result of full matrix-vector multiply. */ int_t **Ufstnz_br_ptr = Llu->Ufstnz_br_ptr; int_t *Urbs, *Urbs1; /* Number of row blocks in each block column of U. */ Ucb_indptr_t **Ucb_indptr;/* Vertical linked list pointing to Uindex[] */ int_t **Ucb_valptr; /* Vertical linked list pointing to Unzval[] */ int_t iam, kcol, krow, mycol, myrow; int_t i, ii, il, j, jj, k, lb, ljb, lk, lptr, luptr; int_t nb, nlb, nub, nsupers; int_t *xsup, *lsub, *usub; int_t *ilsum; /* Starting position of each supernode in lsum (LOCAL)*/ int_t Pc, Pr; int knsupc, nsupr; int ldalsum; /* Number of lsum entries locally owned. */ int maxrecvsz, p, pi; int_t **Lrowind_bc_ptr; double **Lnzval_bc_ptr; MPI_Status status; #if defined(ISEND_IRECV) || defined(BSEND) MPI_Request *send_req, recv_req; int test_flag; #endif /*-- Counts used for L-solve --*/ int_t *fmod; /* Modification count for L-solve. */ int_t **fsendx_plist = Llu->fsendx_plist; int_t nfrecvx = Llu->nfrecvx; /* Number of X components to be recv'd. */ int_t *frecv; /* Count of modifications to be recv'd from processes in this row. */ int_t nfrecvmod = 0; /* Count of total modifications to be recv'd. */ int_t nleaf = 0, nroot = 0; /*-- Counts used for U-solve --*/ int_t *bmod; /* Modification count for L-solve. */ int_t **bsendx_plist = Llu->bsendx_plist; int_t nbrecvx = Llu->nbrecvx; /* Number of X components to be recv'd. */ int_t *brecv; /* Count of modifications to be recv'd from processes in this row. */ int_t nbrecvmod = 0; /* Count of total modifications to be recv'd. */ double t; #if ( DEBUGlevel>=2 ) int_t Ublocks = 0; #endif /*-- Function prototypes --*/ extern void gather_diag_to_all(int_t, int_t, double [], Glu_persist_t *, LocalLU_t *, gridinfo_t *, int_t, int_t [], int_t [], double [], int_t, double []); t = SuperLU_timer_(); /* Test input parameters. */ *info = 0; if ( n < 0 ) *info = -1; else if ( nrhs < 0 ) *info = -9; if ( *info ) { pxerbla("PDGSTRS_BGLOBAL", grid, -*info); return; } /* * Initialization. */ iam = grid->iam; #ifdef BSEND if(!iam) { printf("Using MPI_Bsend in triangular solve\n"); fflush(stdout); } #endif Pc = grid->npcol; Pr = grid->nprow; myrow = MYROW( iam, grid ); mycol = MYCOL( iam, grid ); nsupers = Glu_persist->supno[n-1] + 1; xsup = Glu_persist->xsup; Lrowind_bc_ptr = Llu->Lrowind_bc_ptr; Lnzval_bc_ptr = Llu->Lnzval_bc_ptr; nlb = CEILING( nsupers, Pr ); /* Number of local block rows. */ stat->ops[SOLVE] = 0.0; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter pdgstrs_Bglobal()"); #endif /* Save the count to be altered so it can be used by subsequent call to PDGSTRS_BGLOBAL. */ if ( !(fmod = intMalloc_dist(nlb)) ) ABORT("Calloc fails for fmod[]."); for (i = 0; i < nlb; ++i) fmod[i] = Llu->fmod[i]; if ( !(frecv = intMalloc_dist(nlb)) ) ABORT("Malloc fails for frecv[]."); Llu->frecv = frecv; #if defined(ISEND_IRECV) || defined(BSEND) if ( !(send_req = (MPI_Request*) SUPERLU_MALLOC(Pr*sizeof(MPI_Request))) ) ABORT("Malloc fails for send_req[]."); for (i = 0; i < Pr; ++i) send_req[i] = MPI_REQUEST_NULL; #endif #ifdef _CRAY ftcs1 = _cptofcd("L", strlen("L")); ftcs2 = _cptofcd("N", strlen("N")); ftcs3 = _cptofcd("U", strlen("U")); #endif /* Obtain ilsum[] and ldalsum for process column 0. */ ilsum = Llu->ilsum; ldalsum = Llu->ldalsum; /* Allocate working storage. */ knsupc = sp_ienv_dist(3); maxrecvsz = knsupc * nrhs + SUPERLU_MAX( XK_H, LSUM_H ); if ( !(lsum = doubleCalloc_dist(((size_t)ldalsum) * nrhs + nlb * LSUM_H))) ABORT("Calloc fails for lsum[]."); if ( !(x = doubleMalloc_dist(ldalsum * nrhs + nlb * XK_H)) ) ABORT("Malloc fails for x[]."); if ( !(recvbuf = doubleMalloc_dist(maxrecvsz)) ) ABORT("Malloc fails for recvbuf[]."); if ( !(rtemp = doubleMalloc_dist(maxrecvsz)) ) ABORT("Malloc fails for rtemp[]."); /*--------------------------------------------------- * Forward solve Ly = b. *---------------------------------------------------*/ /* * Copy B into X on the diagonal processes. */ ii = 0; for (k = 0; k < nsupers; ++k) { knsupc = SuperSize( k ); krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ il = LSUM_BLK( lk ); lsum[il - LSUM_H] = k; /* Block number prepended in the header. */ kcol = PCOL( k, grid ); if ( mycol == kcol ) { /* Diagonal process. */ jj = X_BLK( lk ); x[jj - XK_H] = k; /* Block number prepended in the header. */ RHS_ITERATE(j) for (i = 0; i < knsupc; ++i) /* X is stored in blocks. */ x[i + jj + j*knsupc] = B[i + ii + j*ldb]; } } ii += knsupc; } /* * Compute frecv[] and nfrecvmod counts on the diagonal processes. */ { superlu_scope_t *scp = &grid->rscp; for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ kcol = PCOL( k, grid ); /* Root process in this row scope. */ if ( mycol != kcol && fmod[lk] ) i = 1; /* Contribution from non-diagonal process. */ else i = 0; MPI_Reduce( &i, &frecv[lk], 1, mpi_int_t, MPI_SUM, kcol, scp->comm ); if ( mycol == kcol ) { /* Diagonal process. */ nfrecvmod += frecv[lk]; if ( !frecv[lk] && !fmod[lk] ) ++nleaf; #if ( DEBUGlevel>=2 ) printf("(%2d) frecv[%4d] %2d\n", iam, k, frecv[lk]); assert( frecv[lk] < Pc ); #endif } } } } /* --------------------------------------------------------- Solve the leaf nodes first by all the diagonal processes. --------------------------------------------------------- */ #if ( DEBUGlevel>=1 ) printf("(%2d) nleaf %4d\n", iam, nleaf); #endif for (k = 0; k < nsupers && nleaf; ++k) { krow = PROW( k, grid ); kcol = PCOL( k, grid ); if ( myrow == krow && mycol == kcol ) { /* Diagonal process */ knsupc = SuperSize( k ); lk = LBi( k, grid ); if ( frecv[lk]==0 && fmod[lk]==0 ) { fmod[lk] = -1; /* Do not solve X[k] in the future. */ ii = X_BLK( lk ); lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY STRSM(ftcs1, ftcs1, ftcs2, ftcs3, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #else dtrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif stat->ops[SOLVE] += knsupc * (knsupc - 1) * nrhs; --nleaf; #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ for (p = 0; p < Pr; ++p) if ( fsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); #ifdef ISEND_IRECV #if 1 MPI_Test( &send_req[p], &test_flag, &status ); #else if ( send_req[p] != MPI_REQUEST_NULL ) MPI_Wait( &send_req[p], &status ); #endif MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm, &send_req[p]); #else #ifdef BSEND MPI_Bsend( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm ); #else MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm ); #endif #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii-XK_H], pi); #endif } /* * Perform local block modifications: lsum[i] -= L_i,k * X[k] */ nb = lsub[0] - 1; lptr = BC_HEADER + LB_DESCRIPTOR + knsupc; luptr = knsupc; /* Skip diagonal block L(k,k). */ dlsum_fmod(lsum, x, &x[ii], rtemp, nrhs, knsupc, k, fmod, nb, lptr, luptr, xsup, grid, Llu, send_req,stat); #ifdef ISEND_IRECV /* Wait for previous Isends to complete. */ for (p = 0; p < Pr; ++p) { if ( fsendx_plist[lk][p] != EMPTY ) /*MPI_Wait( &send_req[p], &status );*/ MPI_Test( &send_req[p], &test_flag, &status ); } #endif } } /* if diagonal process ... */ } /* for k ... */ /* ----------------------------------------------------------- Compute the internal nodes asynchronously by all processes. ----------------------------------------------------------- */ #if ( DEBUGlevel>=1 ) printf("(%2d) nfrecvx %4d, nfrecvmod %4d, nleaf %4d\n", iam, nfrecvx, nfrecvmod, nleaf); #endif while ( nfrecvx || nfrecvmod ) { /* While not finished. */ /* Receive a message. */ #ifdef ISEND_IRECV /* -MPI- FATAL: Remote protocol queue full */ MPI_Irecv( recvbuf, maxrecvsz, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, grid->comm, &recv_req ); MPI_Wait( &recv_req, &status ); #else MPI_Recv( recvbuf, maxrecvsz, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, grid->comm, &status ); #endif k = *recvbuf; #if ( DEBUGlevel>=2 ) printf("(%2d) Recv'd block %d, tag %2d\n", iam, k, status.MPI_TAG); #endif switch ( status.MPI_TAG ) { case Xk: --nfrecvx; lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; if ( lsub ) { nb = lsub[0]; lptr = BC_HEADER; luptr = 0; knsupc = SuperSize( k ); /* * Perform local block modifications: lsum[i] -= L_i,k * X[k] */ dlsum_fmod(lsum, x, &recvbuf[XK_H], rtemp, nrhs, knsupc, k, fmod, nb, lptr, luptr, xsup, grid, Llu, send_req, stat); } /* if lsub */ break; case LSUM: --nfrecvmod; lk = LBi( k, grid ); /* Local block number, row-wise. */ ii = X_BLK( lk ); knsupc = SuperSize( k ); tempv = &recvbuf[LSUM_H]; RHS_ITERATE(j) for (i = 0; i < knsupc; ++i) x[i + ii + j*knsupc] += tempv[i + j*knsupc]; if ( (--frecv[lk])==0 && fmod[lk]==0 ) { fmod[lk] = -1; /* Do not solve X[k] in the future. */ lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY STRSM(ftcs1, ftcs1, ftcs2, ftcs3, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #else dtrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif stat->ops[SOLVE] += knsupc * (knsupc - 1) * nrhs; #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ kcol = PCOL( k, grid ); for (p = 0; p < Pr; ++p) if ( fsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); #ifdef ISEND_IRECV #if 1 MPI_Test( &send_req[p], &test_flag, &status ); #else if ( send_req[p] != MPI_REQUEST_NULL ) MPI_Wait( &send_req[p], &status ); #endif MPI_Isend( &x[ii-XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm, &send_req[p]); #else #ifdef BSEND MPI_Bsend( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm ); #else MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm ); #endif #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii-XK_H], pi); #endif } /* * Perform local block modifications. */ nb = lsub[0] - 1; lptr = BC_HEADER + LB_DESCRIPTOR + knsupc; luptr = knsupc; /* Skip diagonal block L(k,k). */ dlsum_fmod(lsum, x, &x[ii], rtemp, nrhs, knsupc, k, fmod, nb, lptr, luptr, xsup, grid, Llu, send_req, stat); #ifdef ISEND_IRECV /* Wait for the previous Isends to complete. */ for (p = 0; p < Pr; ++p) { if ( fsendx_plist[lk][p] != EMPTY ) MPI_Test( &send_req[p], &test_flag, &status ); } #endif } /* if */ break; #if ( DEBUGlevel>=1 ) default: printf("(%2d) Recv'd wrong message tag %4d\n", status.MPI_TAG); break; #endif } /* switch */ } /* while not finished ... */ #if ( PRNTlevel>=2 ) t = SuperLU_timer_() - t; if ( !iam ) printf(".. L-solve time\t%8.2f\n", t); t = SuperLU_timer_(); #endif #if ( PRNTlevel==2 ) if ( !iam ) printf("\n.. After L-solve: y =\n"); for (i = 0, k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); kcol = PCOL( k, grid ); if ( myrow == krow && mycol == kcol ) { /* Diagonal process */ knsupc = SuperSize( k ); lk = LBi( k, grid ); ii = X_BLK( lk ); for (j = 0; j < knsupc; ++j) printf("\t(%d)\t%4d\t%.10f\n", iam, xsup[k]+j, x[ii+j]); } MPI_Barrier( grid->comm ); } #endif SUPERLU_FREE(fmod); SUPERLU_FREE(frecv); SUPERLU_FREE(rtemp); /* MPI_Barrier( grid->comm ); Drain messages in the forward solve. */ /*--------------------------------------------------- * Back solve Ux = y. * * The Y components from the forward solve is already * on the diagonal processes. *---------------------------------------------------*/ /* Save the count to be altered so it can be used by subsequent call to PDGSTRS_BGLOBAL. */ if ( !(bmod = intMalloc_dist(nlb)) ) ABORT("Calloc fails for bmod[]."); for (i = 0; i < nlb; ++i) bmod[i] = Llu->bmod[i]; if ( !(brecv = intMalloc_dist(nlb)) ) ABORT("Malloc fails for brecv[]."); Llu->brecv = brecv; /* * Compute brecv[] and nbrecvmod counts on the diagonal processes. */ { superlu_scope_t *scp = &grid->rscp; for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ kcol = PCOL( k, grid ); /* Root process in this row scope. */ if ( mycol != kcol && bmod[lk] ) i = 1; /* Contribution from non-diagonal process. */ else i = 0; MPI_Reduce( &i, &brecv[lk], 1, mpi_int_t, MPI_SUM, kcol, scp->comm ); if ( mycol == kcol ) { /* Diagonal process. */ nbrecvmod += brecv[lk]; if ( !brecv[lk] && !bmod[lk] ) ++nroot; #if ( DEBUGlevel>=2 ) printf("(%2d) brecv[%4d] %2d\n", iam, k, brecv[lk]); assert( brecv[lk] < Pc ); #endif } } } } /* Re-initialize lsum to zero. Each block header is already in place. */ for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { knsupc = SuperSize( k ); lk = LBi( k, grid ); il = LSUM_BLK( lk ); dest = &lsum[il]; RHS_ITERATE(j) for (i = 0; i < knsupc; ++i) dest[i + j*knsupc] = 0.0; } } /* Set up additional pointers for the index and value arrays of U. nlb is the number of local block rows. */ nub = CEILING( nsupers, Pc ); /* Number of local block columns. */ if ( !(Urbs = (int_t *) intCalloc_dist(2*nub)) ) ABORT("Malloc fails for Urbs[]"); /* Record number of nonzero blocks in a block column. */ Urbs1 = Urbs + nub; if ( !(Ucb_indptr = SUPERLU_MALLOC(nub * sizeof(Ucb_indptr_t *))) ) ABORT("Malloc fails for Ucb_indptr[]"); if ( !(Ucb_valptr = SUPERLU_MALLOC(nub * sizeof(int_t *))) ) ABORT("Malloc fails for Ucb_valptr[]"); /* Count number of row blocks in a block column. One pass of the skeleton graph of U. */ for (lk = 0; lk < nlb; ++lk) { usub = Ufstnz_br_ptr[lk]; if ( usub ) { /* Not an empty block row. */ /* usub[0] -- number of column blocks in this block row. */ #if ( DEBUGlevel>=2 ) Ublocks += usub[0]; #endif i = BR_HEADER; /* Pointer in index array. */ for (lb = 0; lb < usub[0]; ++lb) { /* For all column blocks. */ k = usub[i]; /* Global block number */ ++Urbs[LBj(k,grid)]; i += UB_DESCRIPTOR + SuperSize( k ); } } } /* Set up the vertical linked lists for the row blocks. One pass of the skeleton graph of U. */ for (lb = 0; lb < nub; ++lb) if ( Urbs[lb] ) { /* Not an empty block column. */ if ( !(Ucb_indptr[lb] = SUPERLU_MALLOC(Urbs[lb] * sizeof(Ucb_indptr_t))) ) ABORT("Malloc fails for Ucb_indptr[lb][]"); if ( !(Ucb_valptr[lb] = (int_t *) intMalloc_dist(Urbs[lb])) ) ABORT("Malloc fails for Ucb_valptr[lb][]"); } for (lk = 0; lk < nlb; ++lk) { /* For each block row. */ usub = Ufstnz_br_ptr[lk]; if ( usub ) { /* Not an empty block row. */ i = BR_HEADER; /* Pointer in index array. */ j = 0; /* Pointer in nzval array. */ for (lb = 0; lb < usub[0]; ++lb) { /* For all column blocks. */ k = usub[i]; /* Global block number, column-wise. */ ljb = LBj( k, grid ); /* Local block number, column-wise. */ Ucb_indptr[ljb][Urbs1[ljb]].lbnum = lk; Ucb_indptr[ljb][Urbs1[ljb]].indpos = i; Ucb_valptr[ljb][Urbs1[ljb]] = j; ++Urbs1[ljb]; j += usub[i+1]; i += UB_DESCRIPTOR + SuperSize( k ); } } } #if ( DEBUGlevel>=2 ) for (p = 0; p < Pr*Pc; ++p) { if (iam == p) { printf("(%2d) .. Ublocks %d\n", iam, Ublocks); for (lb = 0; lb < nub; ++lb) { printf("(%2d) Local col %2d: # row blocks %2d\n", iam, lb, Urbs[lb]); if ( Urbs[lb] ) { for (i = 0; i < Urbs[lb]; ++i) printf("(%2d) .. row blk %2d:\ lbnum %d, indpos %d, valpos %d\n", iam, i, Ucb_indptr[lb][i].lbnum, Ucb_indptr[lb][i].indpos, Ucb_valptr[lb][i]); } } } MPI_Barrier( grid->comm ); } for (p = 0; p < Pr*Pc; ++p) { if ( iam == p ) { printf("\n(%d) bsendx_plist[][]", iam); for (lb = 0; lb < nub; ++lb) { printf("\n(%d) .. local col %2d: ", iam, lb); for (i = 0; i < Pr; ++i) printf("%4d", bsendx_plist[lb][i]); } printf("\n"); } MPI_Barrier( grid->comm ); } #endif /* DEBUGlevel */ #if ( PRNTlevel>=3 ) t = SuperLU_timer_() - t; if ( !iam) printf(".. Setup U-solve time\t%8.2f\n", t); t = SuperLU_timer_(); #endif /* * Solve the roots first by all the diagonal processes. */ #if ( DEBUGlevel>=1 ) printf("(%2d) nroot %4d\n", iam, nroot); #endif for (k = nsupers-1; k >= 0 && nroot; --k) { krow = PROW( k, grid ); kcol = PCOL( k, grid ); if ( myrow == krow && mycol == kcol ) { /* Diagonal process. */ knsupc = SuperSize( k ); lk = LBi( k, grid ); /* Local block number, row-wise. */ if ( brecv[lk]==0 && bmod[lk]==0 ) { bmod[lk] = -1; /* Do not solve X[k] in the future. */ ii = X_BLK( lk ); lk = LBj( k, grid ); /* Local block number, column-wise */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY STRSM(ftcs1, ftcs3, ftcs2, ftcs2, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #else dtrsm_("L", "U", "N", "N", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif stat->ops[SOLVE] += knsupc * (knsupc + 1) * nrhs; --nroot; #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ for (p = 0; p < Pr; ++p) if ( bsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); #ifdef ISEND_IRECV #if 1 MPI_Test( &send_req[p], &test_flag, &status ); #else if ( send_req[p] != MPI_REQUEST_NULL ) MPI_Wait( &send_req[p], &status ); #endif MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm, &send_req[p]); #else #ifdef BSEND MPI_Bsend( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm ); #else MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm ); #endif #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii-XK_H], pi); #endif } /* * Perform local block modifications: lsum[i] -= U_i,k * X[k] */ if ( Urbs[lk] ) dlsum_bmod(lsum, x, &x[ii], nrhs, k, bmod, Urbs, Ucb_indptr, Ucb_valptr, xsup, grid, Llu, send_req, stat); #ifdef ISEND_IRECV /* Wait for the previous Isends to complete. */ for (p = 0; p < Pr; ++p) { if ( bsendx_plist[lk][p] != EMPTY ) /*MPI_Wait( &send_req[p], &status );*/ MPI_Test( &send_req[p], &test_flag, &status ); } #endif } /* if root ... */ } /* if diagonal process ... */ } /* for k ... */ /* * Compute the internal nodes asynchronously by all processes. */ while ( nbrecvx || nbrecvmod ) { /* While not finished. */ /* Receive a message. */ MPI_Recv( recvbuf, maxrecvsz, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, grid->comm, &status ); k = *recvbuf; #if ( DEBUGlevel>=2 ) printf("(%2d) Recv'd block %d, tag %2d\n", iam, k, status.MPI_TAG); #endif switch ( status.MPI_TAG ) { case Xk: --nbrecvx; lk = LBj( k, grid ); /* Local block number, column-wise. */ /* * Perform local block modifications: * lsum[i] -= U_i,k * X[k] */ dlsum_bmod(lsum, x, &recvbuf[XK_H], nrhs, k, bmod, Urbs, Ucb_indptr, Ucb_valptr, xsup, grid, Llu, send_req, stat); break; case LSUM: --nbrecvmod; lk = LBi( k, grid ); /* Local block number, row-wise. */ ii = X_BLK( lk ); knsupc = SuperSize( k ); tempv = &recvbuf[LSUM_H]; RHS_ITERATE(j) for (i = 0; i < knsupc; ++i) x[i + ii + j*knsupc] += tempv[i + j*knsupc]; if ( (--brecv[lk])==0 && bmod[lk]==0 ) { bmod[lk] = -1; /* Do not solve X[k] in the future. */ lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY STRSM(ftcs1, ftcs3, ftcs2, ftcs2, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #else dtrsm_("L", "U", "N", "N", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif stat->ops[SOLVE] += knsupc * (knsupc + 1) * nrhs; #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ kcol = PCOL( k, grid ); for (p = 0; p < Pr; ++p) if ( bsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); #ifdef ISEND_IRECV #if 1 MPI_Test( &send_req[p], &test_flag, &status ); #else if ( send_req[p] != MPI_REQUEST_NULL ) MPI_Wait( &send_req[p], &status ); #endif MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm, &send_req[p] ); #else #ifdef BSEND MPI_Bsend( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm ); #else MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm ); #endif #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii - XK_H], pi); #endif } /* * Perform local block modifications: * lsum[i] -= U_i,k * X[k] */ if ( Urbs[lk] ) dlsum_bmod(lsum, x, &x[ii], nrhs, k, bmod, Urbs, Ucb_indptr, Ucb_valptr, xsup, grid, Llu, send_req, stat); #ifdef ISEND_IRECV /* Wait for the previous Isends to complete. */ for (p = 0; p < Pr; ++p) { if ( bsendx_plist[lk][p] != EMPTY ) /*MPI_Wait( &send_req[p], &status );*/ MPI_Test( &send_req[p], &test_flag, &status ); } #endif } /* if becomes solvable */ break; #if ( DEBUGlevel>=1 ) default: printf("(%2d) Recv'd wrong message tag %4d\n", status.MPI_TAG); break; #endif } /* switch */ } /* while not finished ... */ #if ( PRNTlevel>=3 ) t = SuperLU_timer_() - t; if ( !iam ) printf(".. U-solve time\t%8.2f\n", t); #endif /* Copy the solution X into B (on all processes). */ { int_t num_diag_procs, *diag_procs, *diag_len; double *work; get_diag_procs(n, Glu_persist, grid, &num_diag_procs, &diag_procs, &diag_len); jj = diag_len[0]; for (j = 1; j < num_diag_procs; ++j) jj = SUPERLU_MAX(jj, diag_len[j]); if ( !(work = doubleMalloc_dist(jj*nrhs)) ) ABORT("Malloc fails for work[]"); gather_diag_to_all(n, nrhs, x, Glu_persist, Llu, grid, num_diag_procs, diag_procs, diag_len, B, ldb, work); SUPERLU_FREE(diag_procs); SUPERLU_FREE(diag_len); SUPERLU_FREE(work); } /* Deallocate storage. */ SUPERLU_FREE(lsum); SUPERLU_FREE(x); SUPERLU_FREE(recvbuf); for (i = 0; i < nub; ++i) if ( Urbs[i] ) { SUPERLU_FREE(Ucb_indptr[i]); SUPERLU_FREE(Ucb_valptr[i]); } SUPERLU_FREE(Ucb_indptr); SUPERLU_FREE(Ucb_valptr); SUPERLU_FREE(Urbs); SUPERLU_FREE(bmod); SUPERLU_FREE(brecv); #ifdef ISEND_IRECV for (p = 0; p < Pr; ++p) { if ( send_req[p] != MPI_REQUEST_NULL ) MPI_Wait( &send_req[p], &status ); } SUPERLU_FREE(send_req); #endif stat->utime[SOLVE] = SuperLU_timer_() - t; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pdgstrs_Bglobal()"); #endif /* Chao debug */ MPI_Barrier( grid->comm ); /* Drain messages in the forward solve. */ } /* PDGSTRS_BGLOBAL */ /*! \brief * *
 * Gather the components of x vector on the diagonal processes
 * onto all processes, and combine them into the global vector y.
 * 
*/ static void gather_diag_to_all(int_t n, int_t nrhs, double x[], Glu_persist_t *Glu_persist, LocalLU_t *Llu, gridinfo_t *grid, int_t num_diag_procs, int_t diag_procs[], int_t diag_len[], double y[], int_t ldy, double work[]) { int_t i, ii, j, k, lk, lwork, nsupers, p; int_t *ilsum, *xsup; int iam, knsupc, pkk; double *x_col, *y_col; iam = grid->iam; nsupers = Glu_persist->supno[n-1] + 1; xsup = Glu_persist->xsup; ilsum = Llu->ilsum; for (p = 0; p < num_diag_procs; ++p) { pkk = diag_procs[p]; if ( iam == pkk ) { /* Copy x vector into a buffer. */ lwork = 0; for (k = p; k < nsupers; k += num_diag_procs) { knsupc = SuperSize( k ); lk = LBi( k, grid ); ii = X_BLK( lk ); /*ilsum[lk] + (lk+1)*XK_H;*/ x_col = &x[ii]; for (j = 0; j < nrhs; ++j) { for (i = 0; i < knsupc; ++i) work[i+lwork] = x_col[i]; lwork += knsupc; x_col += knsupc; } } MPI_Bcast( work, lwork, MPI_DOUBLE, pkk, grid->comm ); } else { MPI_Bcast( work, diag_len[p]*nrhs, MPI_DOUBLE, pkk, grid->comm ); } /* Scatter work[] into global y vector. */ lwork = 0; for (k = p; k < nsupers; k += num_diag_procs) { knsupc = SuperSize( k ); ii = FstBlockC( k ); y_col = &y[ii]; for (j = 0; j < nrhs; ++j) { for (i = 0; i < knsupc; ++i) y_col[i] = work[i+lwork]; lwork += knsupc; y_col += ldy; } } } } /* GATHER_DIAG_TO_ALL */ SuperLU_DIST_5.3.0/SRC/superlu_zdefs.h0000644013363400111340000004271013233431301016332 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Distributed SuperLU data types and function prototypes * *
 * -- Distributed SuperLU routine (version 4.1) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * November 1, 2007
 * April 5, 2015
 * 
*/ #ifndef __SUPERLU_zDEFS /* allow multiple inclusions */ #define __SUPERLU_zDEFS /* * File name: superlu_zdefs.h * Purpose: Distributed SuperLU data types and function prototypes * History: */ #include "superlu_defs.h" #include "dcomplex.h" /*-- Auxiliary data type used in PxGSTRS/PxGSTRS1. */ typedef struct { int_t lbnum; /* Row block number (local). */ int_t indpos; /* Starting position in Uindex[]. */ } Ucb_indptr_t; /* * On each processor, the blocks in L are stored in compressed block * column format, the blocks in U are stored in compressed block row format. */ #define MAX_LOOKAHEADS 50 typedef struct { int_t **Lrowind_bc_ptr; /* size ceil(NSUPERS/Pc) */ doublecomplex **Lnzval_bc_ptr; /* size ceil(NSUPERS/Pc) */ int_t **Ufstnz_br_ptr; /* size ceil(NSUPERS/Pr) */ doublecomplex **Unzval_br_ptr; /* size ceil(NSUPERS/Pr) */ #if 0 int_t *Lsub_buf; /* Buffer for the remote subscripts of L */ double *Lval_buf; /* Buffer for the remote nonzeros of L */ int_t *Usub_buf; /* Buffer for the remote subscripts of U */ doublecomplex *Uval_buf; /* Buffer for the remote nonzeros of U */ #endif int_t *Lsub_buf_2[MAX_LOOKAHEADS]; /* Buffers for the remote subscripts of L*/ doublecomplex *Lval_buf_2[MAX_LOOKAHEADS]; /* Buffers for the remote nonzeros of L */ int_t *Usub_buf_2[MAX_LOOKAHEADS]; /* Buffer for the remote subscripts of U */ doublecomplex *Uval_buf_2[MAX_LOOKAHEADS]; /* Buffer for the remote nonzeros of U */ doublecomplex *ujrow; /* used in panel factorization. */ int_t bufmax[NBUFFERS]; /* Maximum buffer size across all MPI ranks: * 0 : maximum size of Lsub_buf[] * 1 : maximum size of Lval_buf[] * 2 : maximum size of Usub_buf[] * 3 : maximum size of Uval_buf[] * 4 : maximum size of tempv[LDA] */ /*-- Record communication schedule for factorization. --*/ int *ToRecv; /* Recv from no one (0), left (1), and up (2).*/ int *ToSendD; /* Whether need to send down block row. */ int **ToSendR; /* List of processes to send right block col. */ /*-- Record communication schedule for forward/back solves. --*/ int_t *fmod; /* Modification count for L-solve */ int_t **fsendx_plist; /* Column process list to send down Xk */ int_t *frecv; /* Modifications to be recv'd in proc row */ int_t nfrecvx; /* Number of Xk I will receive in L-solve */ int_t nfsendx; /* Number of Xk I will send in L-solve */ int_t *bmod; /* Modification count for U-solve */ int_t **bsendx_plist; /* Column process list to send down Xk */ int_t *brecv; /* Modifications to be recv'd in proc row */ int_t nbrecvx; /* Number of Xk I will receive in U-solve */ int_t nbsendx; /* Number of Xk I will send in U-solve */ int_t *mod_bit; /* Flag contribution from each row blocks */ /*-- Auxiliary arrays used for forward/back solves. --*/ int_t *ilsum; /* Starting position of each supernode in lsum (local) */ int_t ldalsum; /* LDA of lsum (local) */ int_t SolveMsgSent; /* Number of actual messages sent in LU-solve */ int_t SolveMsgVol; /* Volume of messages sent in the solve phase */ /*********************/ /* The following variables are used in the hybrid solver */ /*-- Counts to be used in U^{-T} triangular solve. -- */ int_t UT_SOLVE; int_t L_SOLVE; int_t FRECV; int_t ut_ldalsum; /* LDA of lsum (local) */ int_t *ut_ilsum; /* ilsum in column-wise */ int_t *utmod; /* Modification count for Ut-solve. */ int_t **ut_sendx_plist; /* Row process list to send down Xk */ int_t *utrecv; /* Modifications to be recev'd in proc column. */ int_t n_utsendx; /* Number of Xk I will receive */ int_t n_utrecvx; /* Number of Xk I will send */ int_t n_utrecvmod; int_t nroot; int_t *ut_modbit; int_t *Urbs; Ucb_indptr_t **Ucb_indptr;/* Vertical linked list pointing to Uindex[] */ int_t **Ucb_valptr; /* Vertical linked list pointing to Unzval[] */ /* some additional counters for L solve */ int_t n; int_t nleaf; int_t nfrecvmod; } LocalLU_t; typedef struct { int_t *etree; Glu_persist_t *Glu_persist; LocalLU_t *Llu; } LUstruct_t; /*-- Data structure for communication during matrix-vector multiplication. */ typedef struct { int_t *extern_start; int_t *ind_tosend; /* X indeices to be sent to other processes */ int_t *ind_torecv; /* X indeices to be received from other processes */ int_t *ptr_ind_tosend;/* Printers to ind_tosend[] (Size procs) (also point to val_torecv) */ int_t *ptr_ind_torecv;/* Printers to ind_torecv[] (Size procs) (also point to val_tosend) */ int *SendCounts; /* Numbers of X indices to be sent (also numbers of X values to be received) */ int *RecvCounts; /* Numbers of X indices to be received (also numbers of X values to be sent) */ doublecomplex *val_tosend; /* X values to be sent to other processes */ doublecomplex *val_torecv; /* X values to be received from other processes */ int_t TotalIndSend; /* Total number of indices to be sent (also total number of values to be received) */ int_t TotalValSend; /* Total number of values to be sent. (also total number of indices to be received) */ } pzgsmv_comm_t; /*-- Data structure holding the information for the solution phase --*/ typedef struct { int_t *row_to_proc; int_t *inv_perm_c; int_t num_diag_procs, *diag_procs, *diag_len; pzgsmv_comm_t *gsmv_comm; /* communication metadata for SpMV, required by IterRefine. */ pxgstrs_comm_t *gstrs_comm; /* communication metadata for SpTRSV. */ int_t *A_colind_gsmv; /* After pzgsmv_init(), the global column indices of A are translated into the relative positions in the gathered x-vector. This is re-used in repeated calls to pzgsmv() */ int_t *xrow_to_proc; /* used by PDSLin */ } SOLVEstruct_t; /*********************************************************************** * Function prototypes ***********************************************************************/ #ifdef __cplusplus extern "C" { #endif /* Supernodal LU factor related */ extern void zCreate_CompCol_Matrix_dist(SuperMatrix *, int_t, int_t, int_t, doublecomplex *, int_t *, int_t *, Stype_t, Dtype_t, Mtype_t); extern void zCreate_CompRowLoc_Matrix_dist(SuperMatrix *, int_t, int_t, int_t, int_t, int_t, doublecomplex *, int_t *, int_t *, Stype_t, Dtype_t, Mtype_t); extern void zCompRow_to_CompCol_dist(int_t, int_t, int_t, doublecomplex *, int_t *, int_t *, doublecomplex **, int_t **, int_t **); extern int pzCompRow_loc_to_CompCol_global(int_t, SuperMatrix *, gridinfo_t *, SuperMatrix *); extern void zCopy_CompCol_Matrix_dist(SuperMatrix *, SuperMatrix *); extern void zCreate_Dense_Matrix_dist(SuperMatrix *, int_t, int_t, doublecomplex *, int_t, Stype_t, Dtype_t, Mtype_t); extern void zCreate_SuperNode_Matrix_dist(SuperMatrix *, int_t, int_t, int_t, doublecomplex *, int_t *, int_t *, int_t *, int_t *, int_t *, Stype_t, Dtype_t, Mtype_t); extern void zCopy_Dense_Matrix_dist(int_t, int_t, doublecomplex *, int_t, doublecomplex *, int_t); extern void zallocateA_dist (int_t, int_t, doublecomplex **, int_t **, int_t **); extern void zGenXtrue_dist (int_t, int_t, doublecomplex *, int_t); extern void zFillRHS_dist (char *, int_t, doublecomplex *, int_t, SuperMatrix *, doublecomplex *, int_t); extern int zcreate_matrix(SuperMatrix *, int, doublecomplex **, int *, doublecomplex **, int *, FILE *, gridinfo_t *); extern int zcreate_matrix_rb(SuperMatrix *, int, doublecomplex **, int *, doublecomplex **, int *, FILE *, gridinfo_t *); extern int zcreate_matrix_dat(SuperMatrix *, int, doublecomplex **, int *, doublecomplex **, int *, FILE *, gridinfo_t *); /* Driver related */ extern void zgsequ_dist (SuperMatrix *, double *, double *, double *, double *, double *, int_t *); extern double zlangs_dist (char *, SuperMatrix *); extern void zlaqgs_dist (SuperMatrix *, double *, double *, double, double, double, char *); extern void pzgsequ (SuperMatrix *, double *, double *, double *, double *, double *, int_t *, gridinfo_t *); extern double pzlangs (char *, SuperMatrix *, gridinfo_t *); extern void pzlaqgs (SuperMatrix *, double *, double *, double, double, double, char *); extern int pzPermute_Dense_Matrix(int_t, int_t, int_t [], int_t[], doublecomplex [], int, doublecomplex [], int, int, gridinfo_t *); extern int sp_ztrsv_dist (char *, char *, char *, SuperMatrix *, SuperMatrix *, doublecomplex *, int *); extern int sp_zgemv_dist (char *, doublecomplex, SuperMatrix *, doublecomplex *, int, doublecomplex, doublecomplex *, int); extern int sp_zgemm_dist (char *, int, doublecomplex, SuperMatrix *, doublecomplex *, int, doublecomplex, doublecomplex *, int); extern float zdistribute(fact_t, int_t, SuperMatrix *, Glu_freeable_t *, LUstruct_t *, gridinfo_t *); extern void pzgssvx_ABglobal(superlu_dist_options_t *, SuperMatrix *, ScalePermstruct_t *, doublecomplex *, int, int, gridinfo_t *, LUstruct_t *, double *, SuperLUStat_t *, int *); extern float pzdistribute(fact_t, int_t, SuperMatrix *, ScalePermstruct_t *, Glu_freeable_t *, LUstruct_t *, gridinfo_t *); extern void pzgssvx(superlu_dist_options_t *, SuperMatrix *, ScalePermstruct_t *, doublecomplex *, int, int, gridinfo_t *, LUstruct_t *, SOLVEstruct_t *, double *, SuperLUStat_t *, int *); extern int zSolveInit(superlu_dist_options_t *, SuperMatrix *, int_t [], int_t [], int_t, LUstruct_t *, gridinfo_t *, SOLVEstruct_t *); extern void zSolveFinalize(superlu_dist_options_t *, SOLVEstruct_t *); extern int_t pxgstrs_init(int_t, int_t, int_t, int_t, int_t [], int_t [], gridinfo_t *grid, Glu_persist_t *, SOLVEstruct_t *); extern void pxgstrs_finalize(pxgstrs_comm_t *); extern int zldperm_dist(int_t, int_t, int_t, int_t [], int_t [], doublecomplex [], int_t *, double [], double []); extern int static_schedule(superlu_dist_options_t *, int, int, LUstruct_t *, gridinfo_t *, SuperLUStat_t *, int_t *, int_t *, int *); extern void LUstructInit(const int_t, LUstruct_t *); extern void LUstructFree(LUstruct_t *); extern void Destroy_LU(int_t, gridinfo_t *, LUstruct_t *); /* #define GPU_PROF #define IPM_PROF */ extern int_t pzgstrf(superlu_dist_options_t *, int, int, double, LUstruct_t*, gridinfo_t*, SuperLUStat_t*, int*); extern void pzgstrs_Bglobal(int_t, LUstruct_t *, gridinfo_t *, doublecomplex *, int_t, int, SuperLUStat_t *, int *); extern void pzgstrs(int_t, LUstruct_t *, ScalePermstruct_t *, gridinfo_t *, doublecomplex *, int_t, int_t, int_t, int, SOLVEstruct_t *, SuperLUStat_t *, int *); extern void zlsum_fmod(doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, int, int, int_t , int_t *, int_t, int_t, int_t, int_t *, gridinfo_t *, LocalLU_t *, MPI_Request [], SuperLUStat_t *); extern void zlsum_bmod(doublecomplex *, doublecomplex *, doublecomplex *, int, int_t, int_t *, int_t *, Ucb_indptr_t **, int_t **, int_t *, gridinfo_t *, LocalLU_t *, MPI_Request [], SuperLUStat_t *); extern void pzgsrfs(int_t, SuperMatrix *, double, LUstruct_t *, ScalePermstruct_t *, gridinfo_t *, doublecomplex [], int_t, doublecomplex [], int_t, int, SOLVEstruct_t *, double *, SuperLUStat_t *, int *); extern void pzgsrfs_ABXglobal(int_t, SuperMatrix *, double, LUstruct_t *, gridinfo_t *, doublecomplex *, int_t, doublecomplex *, int_t, int, double *, SuperLUStat_t *, int *); extern int pzgsmv_AXglobal_setup(SuperMatrix *, Glu_persist_t *, gridinfo_t *, int_t *, int_t *[], doublecomplex *[], int_t *[], int_t []); extern int pzgsmv_AXglobal(int_t, int_t [], doublecomplex [], int_t [], doublecomplex [], doublecomplex []); extern int pzgsmv_AXglobal_abs(int_t, int_t [], doublecomplex [], int_t [], doublecomplex [], double []); extern void pzgsmv_init(SuperMatrix *, int_t *, gridinfo_t *, pzgsmv_comm_t *); extern void pzgsmv(int_t, SuperMatrix *, gridinfo_t *, pzgsmv_comm_t *, doublecomplex x[], doublecomplex ax[]); extern void pzgsmv_finalize(pzgsmv_comm_t *); /* Memory-related */ extern doublecomplex *doublecomplexMalloc_dist(int_t); extern doublecomplex *doublecomplexCalloc_dist(int_t); extern double *doubleMalloc_dist(int_t); extern double *doubleCalloc_dist(int_t); extern void *duser_malloc_dist (int_t, int_t); extern void duser_free_dist (int_t, int_t); extern int_t zQuerySpace_dist(int_t, LUstruct_t *, gridinfo_t *, SuperLUStat_t *, superlu_dist_mem_usage_t *); /* Auxiliary routines */ extern void zfill_dist (doublecomplex *, int_t, doublecomplex); extern void zinf_norm_error_dist (int_t, int_t, doublecomplex*, int_t, doublecomplex*, int_t, gridinfo_t*); extern void pzinf_norm_error(int, int_t, int_t, doublecomplex [], int_t, doublecomplex [], int_t , gridinfo_t *); extern void zreadhb_dist (int, FILE *, int_t *, int_t *, int_t *, doublecomplex **, int_t **, int_t **); extern void zreadtriple_dist(FILE *, int_t *, int_t *, int_t *, doublecomplex **, int_t **, int_t **); extern void zreadrb_dist(int, FILE *, int_t *, int_t *, int_t *, doublecomplex **, int_t **, int_t **); extern void zreadMM_dist(FILE *, int_t *, int_t *, int_t *, doublecomplex **, int_t **, int_t **); /* Distribute the data for numerical factorization */ extern float zdist_psymbtonum(fact_t, int_t, SuperMatrix *, ScalePermstruct_t *, Pslu_freeable_t *, LUstruct_t *, gridinfo_t *); extern void pzGetDiagU(int_t, LUstruct_t *, gridinfo_t *, doublecomplex *); /* Routines for debugging */ extern void zPrintLblocks(int, int_t, gridinfo_t *, Glu_persist_t *, LocalLU_t *); extern void zPrintUblocks(int, int_t, gridinfo_t *, Glu_persist_t *, LocalLU_t *); extern void zPrint_CompCol_Matrix_dist(SuperMatrix *); extern void zPrint_Dense_Matrix_dist(SuperMatrix *); extern int zPrint_CompRowLoc_Matrix_dist(SuperMatrix *); extern void PrintDoublecomplex(char *, int_t, doublecomplex *); extern int file_PrintDoublecomplex(FILE *fp, char *, int_t, doublecomplex *); /* BLAS */ #ifdef USE_VENDOR_BLAS extern void zgemm_(const char*, const char*, const int*, const int*, const int*, const doublecomplex*, const doublecomplex*, const int*, const doublecomplex*, const int*, const doublecomplex*, doublecomplex*, const int*, int, int); extern void ztrsv_(char*, char*, char*, int*, doublecomplex*, int*, doublecomplex*, int*, int, int, int); extern void ztrsm_(char*, char*, char*, char*, int*, int*, doublecomplex*, doublecomplex*, int*, doublecomplex*, int*, int, int, int, int); extern void zgemv_(char *, int *, int *, doublecomplex *, doublecomplex *a, int *, doublecomplex *, int *, doublecomplex *, doublecomplex *, int *, int); extern void zgeru_(int*, int*, doublecomplex*, doublecomplex*, int*, doublecomplex*, int*, doublecomplex*, int*); #else extern int zgemm_(const char*, const char*, const int*, const int*, const int*, const doublecomplex*, const doublecomplex*, const int*, const doublecomplex*, const int*, const doublecomplex*, doublecomplex*, const int*); extern int ztrsv_(char*, char*, char*, int*, doublecomplex*, int*, doublecomplex*, int*); extern int ztrsm_(char*, char*, char*, char*, int*, int*, doublecomplex*, doublecomplex*, int*, doublecomplex*, int*); extern int zgemv_(char *, int *, int *, doublecomplex *, doublecomplex *a, int *, doublecomplex *, int *, doublecomplex *, doublecomplex *, int *); extern int zgeru_(int*, int*, doublecomplex*, doublecomplex*, int*, doublecomplex*, int*, doublecomplex*, int*); #endif #ifdef __cplusplus } #endif #endif /* __SUPERLU_dDEFS */ SuperLU_DIST_5.3.0/SRC/dreadtriple_noheader.c0000644013363400111340000001105313233431301017573 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief * */ #include #include "superlu_ddefs.h" #undef EXPAND_SYM /*! brief * *
 * Output parameters
 * =================
 *   (nzval, rowind, colptr): (*rowind)[*] contains the row subscripts of
 *      nonzeros in columns of matrix A; (*nzval)[*] the numerical values;
 *	column i of A is given by (*nzval)[k], k = (*rowind)[i],...,
 *      (*rowind)[i+1]-1.
 * 
*/ void dreadtriple_noheader(FILE *fp, int_t *m, int_t *n, int_t *nonz, double **nzval, int_t **rowind, int_t **colptr) { int_t i, j, k, jsize, lasta, nnz, nz, new_nonz, minn = 100; double *a, *val, vali; int_t *asub, *xa, *row, *col; int zero_base = 0, ret_val = 0; /* File format: Triplet in a line for each nonzero entry: * row col value * or row col real_part imaginary_part */ /* First pass: determine N and NNZ */ nz = *n = 0; #ifdef _LONGINT ret_val = fscanf(fp, "%ld%ld%lf%\n", &i, &j, &vali); #else ret_val = fscanf(fp, "%d%d%lf\n", &i, &j, &vali); #endif while (ret_val != EOF) { *n = SUPERLU_MAX(*n, i); *n = SUPERLU_MAX(*n, j); minn = SUPERLU_MIN(minn, i); minn = SUPERLU_MIN(minn, j); ++nz; #ifdef _LONGINT ret_val = fscanf(fp, "%ld%ld%lf%\n", &i, &j, &vali); #else ret_val = fscanf(fp, "%d%d%lf\n", &i, &j, &vali); #endif } if ( minn == 0 ) { /* zero-based indexing */ zero_base = 1; ++(*n); printf("triplet file: row/col indices are zero-based.\n"); } else { printf("triplet file: row/col indices are one-based.\n"); } *m = *n; *nonz = nz; rewind(fp); #ifdef EXPAND_SYM new_nonz = 2 * *nonz - *n; #else new_nonz = *nonz; #endif /* Second pass: read the actual matrix values */ printf("m %ld, n %ld, nonz %ld\n", *m, *n, *nonz); dallocateA_dist(*n, new_nonz, nzval, rowind, colptr); /* Allocate storage */ a = *nzval; asub = *rowind; xa = *colptr; if ( !(val = (double *) SUPERLU_MALLOC(new_nonz * sizeof(double))) ) ABORT("Malloc fails for val[]"); if ( !(row = (int_t *) SUPERLU_MALLOC(new_nonz * sizeof(int_t))) ) ABORT("Malloc fails for row[]"); if ( !(col = (int_t *) SUPERLU_MALLOC(new_nonz * sizeof(int_t))) ) ABORT("Malloc fails for col[]"); for (j = 0; j < *n; ++j) xa[j] = 0; /* Read into the triplet array from a file */ for (nnz = 0, nz = 0; nnz < *nonz; ++nnz) { #ifdef _LONGINT fscanf(fp, "%ld%ld%lf\n", &row[nz], &col[nz], &val[nz]); #else fscanf(fp, "%d%d%lf\n", &row[nz], &col[nz], &val[nz]); #endif if ( !zero_base ) { /* Change to 0-based indexing. */ --row[nz]; --col[nz]; } if (row[nz] < 0 || row[nz] >= *m || col[nz] < 0 || col[nz] >= *n /*|| val[nz] == 0.*/) { fprintf(stderr, "nz %d, (%d, %d) = %e out of bound, removed\n", nz, row[nz], col[nz], val[nz]); exit(-1); } else { ++xa[col[nz]]; #ifdef EXPAND_SYM if ( row[nz] != col[nz] ) { /* Excluding diagonal */ ++nz; row[nz] = col[nz-1]; col[nz] = row[nz-1]; val[nz] = val[nz-1]; ++xa[col[nz]]; } #endif ++nz; } } *nonz = nz; #ifdef EXPAND_SYM printf("new_nonz after symmetric expansion:\t%d\n", *nonz); #endif /* Initialize the array of column pointers */ k = 0; jsize = xa[0]; xa[0] = 0; for (j = 1; j < *n; ++j) { k += jsize; jsize = xa[j]; xa[j] = k; } /* Copy the triplets into the column oriented storage */ for (nz = 0; nz < *nonz; ++nz) { j = col[nz]; k = xa[j]; asub[k] = row[nz]; a[k] = val[nz]; ++xa[j]; } /* Reset the column pointers to the beginning of each column */ for (j = *n; j > 0; --j) xa[j] = xa[j-1]; xa[0] = 0; SUPERLU_FREE(val); SUPERLU_FREE(row); SUPERLU_FREE(col); #ifdef CHK_INPUT for (i = 0; i < *n; i++) { printf("Col %d, xa %d\n", i, xa[i]); for (k = xa[i]; k < xa[i+1]; k++) printf("%d\t%16.10f\n", asub[k], a[k]); } #endif } #if 0 void dreadrhs(int m, double *b) { FILE *fp, *fopen(); int i, j; if ( !(fp = fopen("b.dat", "r")) ) { fprintf(stderr, "zreadrhs: file does not exist\n"); exit(-1); } for (i = 0; i < m; ++i) fscanf(fp, "%lf\n", &b[i]); fclose(fp); } #endif SuperLU_DIST_5.3.0/SRC/etree.c0000644013363400111340000002413413233431301014537 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Elimination tree computation and layout routines * *
 *  Implementation of disjoint set union routines.
 *  Elements are integers in 0..n-1, and the 
 *  names of the sets themselves are of type int.
 *  
 *  Calls are:
 *  initialize_disjoint_sets (n) initial call.
 *  s = make_set (i)             returns a set containing only i.
 *  s = link (t, u)		 returns s = t union u, destroying t and u.
 *  s = find (i)		 return name of set containing i.
 *  finalize_disjoint_sets 	 final call.
 *
 *  This implementation uses path compression but not weighted union.
 *  See Tarjan's book for details.
 *  John Gilbert, CMI, 1987.
 *
 *  Implemented path-halving by XL 7/5/95.
 * 
*/ #include #include #include "superlu_ddefs.h" static int_t *mxCallocInt(int_t n) { register int_t i; int_t *buf; buf = (int_t *) SUPERLU_MALLOC( n * sizeof(int_t) ); if ( buf ) for (i = 0; i < n; i++) buf[i] = 0; return (buf); } static void initialize_disjoint_sets ( int_t n, int_t **pp /* parent array for sets */ ) { if ( !( (*pp) = mxCallocInt(n)) ) ABORT("mxCallocInit fails for pp[]"); } static int_t make_set ( int_t i, int_t *pp /* parent array for sets */ ) { pp[i] = i; return i; } static int_t link ( int_t s, int_t t, int_t *pp ) { pp[s] = t; return t; } /* PATH HALVING */ static int_t find ( int_t i, int_t *pp ) { register int_t p, gp; p = pp[i]; gp = pp[p]; while (gp != p) { pp[i] = gp; i = gp; p = pp[i]; gp = pp[p]; } return (p); } #if 0 /* PATH COMPRESSION */ static int_t find ( int_t i ) { if (pp[i] != i) pp[i] = find (pp[i]); return pp[i]; } #endif static void finalize_disjoint_sets ( int_t *pp ) { SUPERLU_FREE(pp); } /*! \brief Symmetric elimination tree * *
 *      p = spsymetree (A);
 *
 *      Find the elimination tree for symmetric matrix A.
 *      This uses Liu's algorithm, and runs in time O(nz*log n).
 *
 *      Input:
 *        Square sparse matrix A.  No check is made for symmetry;
 *        elements below and on the diagonal are ignored.
 *        Numeric values are ignored, so any explicit zeros are 
 *        treated as nonzero.
 *      Output:
 *        Integer array of parents representing the etree, with n
 *        meaning a root of the elimination forest.
 *      Note:  
 *        This routine uses only the upper triangle, while sparse
 *        Cholesky (as in spchol.c) uses only the lower.  Matlab's
 *        dense Cholesky uses only the upper.  This routine could
 *        be modified to use the lower triangle either by transposing
 *        the matrix or by traversing it by rows with auxiliary
 *        pointer and link arrays.
 *
 *      John R. Gilbert, Xerox, 10 Dec 1990
 *      Based on code by JRG dated 1987, 1988, and 1990.
 *      Modified by X.S. Li, November 1999.
 * 
*/ int sp_symetree_dist( int_t *acolst, int_t *acolend, /* column starts and ends past 1 */ int_t *arow, /* row indices of A */ int_t n, /* dimension of A */ int_t *parent /* parent in elim tree */ ) { int_t *root; /* root of subtee of etree */ int_t rset, cset; int_t row, col; int_t rroot; int_t p; int_t *pp; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(0, "Enter sp_symetree()"); #endif root = mxCallocInt (n); initialize_disjoint_sets (n, &pp); for (col = 0; col < n; col++) { cset = make_set (col, pp); root[cset] = col; parent[col] = n; /* Matlab */ for (p = acolst[col]; p < acolend[col]; p++) { row = arow[p]; if (row >= col) continue; rset = find (row, pp); rroot = root[rset]; if (rroot != col) { parent[rroot] = col; cset = link (cset, rset, pp); root[cset] = col; } } } SUPERLU_FREE (root); finalize_disjoint_sets (pp); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(0, "Exit sp_symetree()"); #endif return 0; } /* SP_SYMETREE_DIST */ /*! \brief Nonsymmetric elimination tree * *
 *      Find the elimination tree for A'*A.
 *      This uses something similar to Liu's algorithm. 
 *      It runs in time O(nz(A)*log n) and does not form A'*A.
 *
 *      Input:
 *        Sparse matrix A.  Numeric values are ignored, so any
 *        explicit zeros are treated as nonzero.
 *      Output:
 *        Integer array of parents representing the elimination
 *        tree of the symbolic product A'*A.  Each vertex is a
 *        column of A, and nc means a root of the elimination forest.
 *
 *      John R. Gilbert, Xerox, 10 Dec 1990
 *      Based on code by JRG dated 1987, 1988, and 1990.
 * 
*/ int sp_coletree_dist( int_t *acolst, int_t *acolend, /* column start and end past 1 */ int_t *arow, /* row indices of A */ int_t nr, int_t nc, /* dimension of A */ int_t *parent /* parent in elim tree */ ) { int_t *root; /* root of subtee of etree */ int_t *firstcol; /* first nonzero col in each row*/ int_t rset, cset; int_t row, col; int_t rroot; int_t p; int_t *pp; #if ( DEBUGlevel>=1 ) int iam = 0; CHECK_MALLOC(iam, "Enter sp_coletree()"); #endif root = mxCallocInt (nc); initialize_disjoint_sets (nc, &pp); /* Compute firstcol[row] = first nonzero column in row */ firstcol = mxCallocInt (nr); for (row = 0; row < nr; firstcol[row++] = nc); for (col = 0; col < nc; col++) for (p = acolst[col]; p < acolend[col]; p++) { row = arow[p]; firstcol[row] = SUPERLU_MIN(firstcol[row], col); } /* Compute etree by Liu's algorithm for symmetric matrices, except use (firstcol[r],c) in place of an edge (r,c) of A. Thus each row clique in A'*A is replaced by a star centered at its first vertex, which has the same fill. */ for (col = 0; col < nc; col++) { cset = make_set (col, pp); root[cset] = col; parent[col] = nc; /* Matlab */ for (p = acolst[col]; p < acolend[col]; p++) { row = firstcol[arow[p]]; if (row >= col) continue; rset = find (row, pp); rroot = root[rset]; if (rroot != col) { parent[rroot] = col; cset = link (cset, rset, pp); root[cset] = col; } } } SUPERLU_FREE (root); SUPERLU_FREE (firstcol); finalize_disjoint_sets (pp); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit sp_coletree()"); #endif return 0; } /* SP_COLETREE_DIST */ /*! \brief Depth-first search from vertext * *
 *  q = TreePostorder_dist (n, p);
 *
 *	Postorder a tree.
 *	Input:
 *	  p is a vector of parent pointers for a forest whose
 *        vertices are the integers 0 to n-1; p[root]==n.
 *	Output:
 *	  q is a vector indexed by 0..n-1 such that q[i] is the
 *	  i-th vertex in a postorder numbering of the tree.
 *
 *        ( 2/7/95 modified by X.Li:
 *          q is a vector indexed by 0:n-1 such that vertex i is the
 *          q[i]-th vertex in a postorder numbering of the tree.
 *          That is, this is the inverse of the previous q. )
 *
 *	In the child structure, lower-numbered children are represented
 *	first, so that a tree which is already numbered in postorder
 *	will not have its order changed.
 *    
 *  Written by John Gilbert, Xerox, 10 Dec 1990.
 *  Based on code written by John Gilbert at CMI in 1987.
 * 
*/ static int_t *first_kid, *next_kid; /* Linked list of children. */ static int_t *post, postnum; static /* * Depth-first search from vertex v. */ void etdfs ( int_t v, int_t first_kid[], int_t next_kid[], int_t post[], int_t *postnum ) { int w; for (w = first_kid[v]; w != -1; w = next_kid[w]) { etdfs (w, first_kid, next_kid, post, postnum); } /* post[postnum++] = v; in Matlab */ post[v] = (*postnum)++; /* Modified by X. Li on 08/10/07 */ } static /* * Depth-first search from vertex n. * No recursion. */ void nr_etdfs (int_t n, int_t *parent, int_t *first_kid, int_t *next_kid, int_t *post, int_t postnum) { int_t current = n, first, next; while (postnum != n){ /* no kid for the current node */ first = first_kid[current]; /* no first kid for the current node */ if (first == -1){ /* numbering this node because it has no kid */ post[current] = postnum++; /* looking for the next kid */ next = next_kid[current]; while (next == -1){ /* no more kids : back to the parent node */ current = parent[current]; /* numbering the parent node */ post[current] = postnum++; /* get the next kid */ next = next_kid[current]; } /* stopping criterion */ if (postnum==n+1) return; /* updating current node */ current = next; } /* updating current node */ else { current = first; } } } /* * Post order a tree */ int_t *TreePostorder_dist( int_t n, int_t *parent ) { int_t v, dad; int_t *first_kid, *next_kid, *post, postnum; /* Allocate storage for working arrays and results */ if ( !(first_kid = mxCallocInt (n+1)) ) ABORT("mxCallocInt fails for first_kid[]"); if ( !(next_kid = mxCallocInt (n+1)) ) ABORT("mxCallocInt fails for next_kid[]"); if ( !(post = mxCallocInt (n+1)) ) ABORT("mxCallocInt fails for post[]"); /* Set up structure describing children */ for (v = 0; v <= n; first_kid[v++] = -1); for (v = n-1; v >= 0; v--) { dad = parent[v]; next_kid[v] = first_kid[dad]; first_kid[dad] = v; } /* Depth-first search from dummy root vertex #n */ postnum = 0; #if 0 /* recursion */ etdfs (n, first_kid, next_kid, post, &postnum); #else /* no recursion */ nr_etdfs(n, parent, first_kid, next_kid, post, postnum); #endif SUPERLU_FREE(first_kid); SUPERLU_FREE(next_kid); return post; } SuperLU_DIST_5.3.0/SRC/pdgsrfs_ABXglobal.c0000644013363400111340000003364713233431301016767 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Improves the computed solution and provies error bounds * *
 * -- Distributed SuperLU routine (version 4.3) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 *
 * Last modified:
 * December 31, 2015  version 4.3
 * 
*/ #include #include "superlu_ddefs.h" /*-- Function prototypes --*/ static void gather_1rhs_diag_to_all(int_t, double [], Glu_persist_t *, LocalLU_t *, gridinfo_t *, int_t, int_t [], int_t [], double [], double []); static void redist_all_to_diag(int_t, double [], Glu_persist_t *, LocalLU_t *, gridinfo_t *, int_t [], double []); /*! \brief * *
 * Purpose
 * =======
 *
 * pdgsrfs_ABXglobal improves the computed solution to a system of linear   
 * equations and provides error bounds and backward error estimates
 * for the solution. 
 *
 * Arguments
 * =========
 *
 * n      (input) int (global)
 *        The order of the system of linear equations.
 *
 * A      (input) SuperMatrix*
 *	  The original matrix A, or the scaled A if equilibration was done.
 *        A is also permuted into the form Pc*Pr*A*Pc', where Pr and Pc
 *        are permutation matrices. The type of A can be:
 *        Stype = SLU_NCP; Dtype = SLU_D; Mtype = SLU_GE.
 *
 *        NOTE: Currently, A must reside in all processes when calling
 *              this routine.
 *
 * anorm  (input) double
 *        The norm of the original matrix A, or the scaled A if
 *        equilibration was done.
 *
 * LUstruct (input) LUstruct_t*
 *        The distributed data structures storing L and U factors.
 *        The L and U factors are obtained from pdgstrf for
 *        the possibly scaled and permuted matrix A.
 *        See superlu_ddefs.h for the definition of 'LUstruct_t'.
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh. It contains the MPI communicator, the number
 *        of process rows (NPROW), the number of process columns (NPCOL),
 *        and my process rank. It is an input argument to all the
 *        parallel routines.
 *        Grid can be initialized by subroutine SUPERLU_GRIDINIT.
 *        See superlu_ddefs.h for the definition of 'gridinfo_t'.
 *
 * B      (input) double* (global)
 *        The N-by-NRHS right-hand side matrix of the possibly equilibrated
 *        and row permuted system.
 *       
 *        NOTE: Currently, B must reside on all processes when calling
 *              this routine.
 *
 * ldb    (input) int (global)
 *        Leading dimension of matrix B.
 *
 * X      (input/output) double* (global)
 *        On entry, the solution matrix X, as computed by PDGSTRS.
 *        On exit, the improved solution matrix X.
 *        If DiagScale = COL or BOTH, X should be premultiplied by diag(C)
 *        in order to obtain the solution to the original system.
 *
 *        NOTE: Currently, X must reside on all processes when calling
 *              this routine.
 *
 * ldx    (input) int (global)
 *        Leading dimension of matrix X.
 *
 * nrhs   (input) int
 *        Number of right-hand sides.
 *
 * berr   (output) double*, dimension (nrhs)
 *         The componentwise relative backward error of each solution   
 *         vector X(j) (i.e., the smallest relative change in   
 *         any element of A or B that makes X(j) an exact solution).
 *
 * stat   (output) SuperLUStat_t*
 *        Record the statistics about the refinement steps.
 *        See util.h for the definition of SuperLUStat_t.
 *
 * info   (output) int*
 *        = 0: successful exit
 *        < 0: if info = -i, the i-th argument had an illegal value
 *        
 * Internal Parameters   
 * ===================   
 *
 * ITMAX is the maximum number of steps of iterative refinement.   
 * 
*/ void pdgsrfs_ABXglobal(int_t n, SuperMatrix *A, double anorm, LUstruct_t *LUstruct, gridinfo_t *grid, double *B, int_t ldb, double *X, int_t ldx, int nrhs, double *berr, SuperLUStat_t *stat, int *info) { #define ITMAX 20 Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; /* * Data structures used by matrix-vector multiply routine. */ int_t N_update; /* Number of variables updated on this process */ int_t *update; /* vector elements (global index) updated on this processor. */ int_t *bindx; double *val; int_t *mv_sup_to_proc; /* Supernode to process mapping in matrix-vector multiply. */ /*-- end data structures for matrix-vector multiply --*/ double *b, *ax, *R, *B_col, *temp, *work, *X_col, *x_trs, *dx_trs; int_t count, ii, j, jj, k, knsupc, lk, lwork, nprow, nsupers, nz, p; int i, iam, pkk; int_t *ilsum, *xsup; double eps, lstres; double s, safmin, safe1, safe2; /* NEW STUFF */ int_t num_diag_procs, *diag_procs; /* Record diagonal process numbers. */ int_t *diag_len; /* Length of the X vector on diagonal processes. */ /*-- Function prototypes --*/ extern void pdgstrs1(int_t, LUstruct_t *, gridinfo_t *, double *, int, SuperLUStat_t *, int *); /* Test the input parameters. */ *info = 0; if ( n < 0 ) *info = -1; else if ( A->nrow != A->ncol || A->nrow < 0 || A->Stype != SLU_NCP || A->Dtype != SLU_D || A->Mtype != SLU_GE ) *info = -2; else if ( ldb < SUPERLU_MAX(0, n) ) *info = -10; else if ( ldx < SUPERLU_MAX(0, n) ) *info = -12; else if ( nrhs < 0 ) *info = -13; if (*info != 0) { i = -(*info); pxerr_dist("pdgsrfs_ABXglobal", grid, i); return; } /* Quick return if possible. */ if ( n == 0 || nrhs == 0 ) { return; } /* Initialization. */ iam = grid->iam; nprow = grid->nprow; nsupers = Glu_persist->supno[n-1] + 1; xsup = Glu_persist->xsup; ilsum = Llu->ilsum; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter pdgsrfs_ABXglobal()"); #endif get_diag_procs(n, Glu_persist, grid, &num_diag_procs, &diag_procs, &diag_len); #if ( PRNTlevel>=1 ) if ( !iam ) { printf(".. number of diag processes = " IFMT "\n", num_diag_procs); PrintInt10("diag_procs", num_diag_procs, diag_procs); PrintInt10("diag_len", num_diag_procs, diag_len); } #endif if ( !(mv_sup_to_proc = intCalloc_dist(nsupers)) ) ABORT("Calloc fails for mv_sup_to_proc[]"); pdgsmv_AXglobal_setup(A, Glu_persist, grid, &N_update, &update, &val, &bindx, mv_sup_to_proc); i = CEILING( nsupers, nprow ); /* Number of local block rows */ ii = Llu->ldalsum + i * XK_H; k = SUPERLU_MAX(N_update, sp_ienv_dist(3)); jj = diag_len[0]; for (j = 1; j < num_diag_procs; ++j) jj = SUPERLU_MAX( jj, diag_len[j] ); jj = SUPERLU_MAX( jj, N_update ); lwork = N_update /* For ax and R */ + ii /* For dx_trs */ + ii /* For x_trs */ + k /* For b */ + jj; /* for temp */ if ( !(work = doubleMalloc_dist(lwork)) ) ABORT("Malloc fails for work[]"); ax = R = work; dx_trs = work + N_update; x_trs = dx_trs + ii; b = x_trs + ii; temp = b + k; #if ( DEBUGlevel>=2 ) { double *dwork = doubleMalloc_dist(n); for (i = 0; i < n; ++i) { if ( i & 1 ) dwork[i] = 1.; else dwork[i] = 2.; } /* Check correctness of matrix-vector multiply. */ pdgsmv_AXglobal(N_update, update, val, bindx, dwork, ax); PrintDouble5("Mult A*x", N_update, ax); SUPERLU_FREE(dwork); } #endif /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ nz = A->ncol + 1; eps = dmach_dist("Epsilon"); safmin = dmach_dist("Safe minimum"); /* Set SAFE1 essentially to be the underflow threshold times the number of additions in each row. */ safe1 = nz * safmin; safe2 = safe1 / eps; #if ( DEBUGlevel>=1 ) if ( !iam ) printf(".. eps = %e\tanorm = %e\tsafe1 = %e\tsafe2 = %e\n", eps, anorm, safe1, safe2); #endif /* Do for each right-hand side ... */ for (j = 0; j < nrhs; ++j) { count = 0; lstres = 3.; /* Copy X into x on the diagonal processes. */ B_col = &B[j*ldb]; X_col = &X[j*ldx]; for (p = 0; p < num_diag_procs; ++p) { pkk = diag_procs[p]; if ( iam == pkk ) { for (k = p; k < nsupers; k += num_diag_procs) { knsupc = SuperSize( k ); lk = LBi( k, grid ); ii = ilsum[lk] + (lk+1)*XK_H; jj = FstBlockC( k ); for (i = 0; i < knsupc; ++i) x_trs[i+ii] = X_col[i+jj]; dx_trs[ii-XK_H] = k;/* Block number prepended in header. */ } } } /* Copy B into b distributed the same way as matrix-vector product. */ if ( N_update ) ii = update[0]; for (i = 0; i < N_update; ++i) b[i] = B_col[i + ii]; while (1) { /* Loop until stopping criterion is satisfied. */ /* Compute residual R = B - op(A) * X, where op(A) = A, A**T, or A**H, depending on TRANS. */ /* Matrix-vector multiply. */ pdgsmv_AXglobal(N_update, update, val, bindx, X_col, ax); /* Compute residual. */ for (i = 0; i < N_update; ++i) R[i] = b[i] - ax[i]; /* Compute abs(op(A))*abs(X) + abs(B). */ pdgsmv_AXglobal_abs(N_update, update, val, bindx, X_col, temp); for (i = 0; i < N_update; ++i) temp[i] += fabs(b[i]); s = 0.0; for (i = 0; i < N_update; ++i) { if ( temp[i] > safe2 ) { s = SUPERLU_MAX(s, fabs(R[i]) / temp[i]); } else if ( temp[i] != 0.0 ) { /* Adding SAFE1 to the numerator guards against spuriously zero residuals (underflow). */ s = SUPERLU_MAX(s, (safe1 + fabs(R[i])) / temp[i]); } /* If temp[i] is exactly 0.0 (computed by PxGSMV), then we know the true residual also must be exactly 0.0. */ } MPI_Allreduce( &s, &berr[j], 1, MPI_DOUBLE, MPI_MAX, grid->comm ); #if ( PRNTlevel>= 1 ) if ( !iam ) printf("(%2d) .. Step " IFMT ": berr[j] = %e\n", iam, count, berr[j]); #endif if ( berr[j] > eps && berr[j] * 2 <= lstres && count < ITMAX ) { /* Compute new dx. */ redist_all_to_diag(n, R, Glu_persist, Llu, grid, mv_sup_to_proc, dx_trs); pdgstrs1(n, LUstruct, grid, dx_trs, 1, stat, info); /* Update solution. */ for (p = 0; p < num_diag_procs; ++p) if ( iam == diag_procs[p] ) for (k = p; k < nsupers; k += num_diag_procs) { lk = LBi( k, grid ); ii = ilsum[lk] + (lk+1)*XK_H; knsupc = SuperSize( k ); for (i = 0; i < knsupc; ++i) x_trs[i + ii] += dx_trs[i + ii]; } lstres = berr[j]; ++count; /* Transfer x_trs (on diagonal processes) into X (on all processes). */ gather_1rhs_diag_to_all(n, x_trs, Glu_persist, Llu, grid, num_diag_procs, diag_procs, diag_len, X_col, temp); } else { break; } } /* end while */ stat->RefineSteps = count; } /* for j ... */ /* Deallocate storage used by matrix-vector multiplication. */ SUPERLU_FREE(diag_procs); SUPERLU_FREE(diag_len); if ( N_update ) { SUPERLU_FREE(update); SUPERLU_FREE(bindx); SUPERLU_FREE(val); } SUPERLU_FREE(mv_sup_to_proc); SUPERLU_FREE(work); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pdgsrfs_ABXglobal()"); #endif } /* PDGSRFS_ABXGLOBAL */ /*! \brief * *
 * r[] is the residual vector distributed the same way as
 * matrix-vector product.
 * 
*/ static void redist_all_to_diag(int_t n, double r[], Glu_persist_t *Glu_persist, LocalLU_t *Llu, gridinfo_t *grid, int_t mv_sup_to_proc[], double work[]) { int_t i, ii, k, lk, lr, nsupers; int_t *ilsum, *xsup; int iam, knsupc, psrc, pkk; MPI_Status status; iam = grid->iam; nsupers = Glu_persist->supno[n-1] + 1; xsup = Glu_persist->xsup; ilsum = Llu->ilsum; lr = 0; for (k = 0; k < nsupers; ++k) { pkk = PNUM( PROW( k, grid ), PCOL( k, grid ), grid ); psrc = mv_sup_to_proc[k]; knsupc = SuperSize( k ); lk = LBi( k, grid ); ii = ilsum[lk] + (lk+1)*XK_H; if ( iam == psrc ) { if ( iam != pkk ) { /* Send X component. */ MPI_Send( &r[lr], knsupc, MPI_DOUBLE, pkk, Xk, grid->comm ); } else { /* Local copy. */ for (i = 0; i < knsupc; ++i) work[i + ii] = r[i + lr]; } lr += knsupc; } else { if ( iam == pkk ) { /* Recv X component. */ MPI_Recv( &work[ii], knsupc, MPI_DOUBLE, psrc, Xk, grid->comm, &status ); } } } } /* REDIST_ALL_TO_DIAG */ /*! \brief * *
 * Gather the components of x vector on the diagonal processes
 * onto all processes, and combine them into the global vector y.
 * 
*/ static void gather_1rhs_diag_to_all(int_t n, double x[], Glu_persist_t *Glu_persist, LocalLU_t *Llu, gridinfo_t *grid, int_t num_diag_procs, int_t diag_procs[], int_t diag_len[], double y[], double work[]) { int_t i, ii, k, lk, lwork, nsupers, p; int_t *ilsum, *xsup; int iam, knsupc, pkk; iam = grid->iam; nsupers = Glu_persist->supno[n-1] + 1; xsup = Glu_persist->xsup; ilsum = Llu->ilsum; for (p = 0; p < num_diag_procs; ++p) { pkk = diag_procs[p]; if ( iam == pkk ) { /* Copy x vector into a buffer. */ lwork = 0; for (k = p; k < nsupers; k += num_diag_procs) { knsupc = SuperSize( k ); lk = LBi( k, grid ); ii = ilsum[lk] + (lk+1)*XK_H; for (i = 0; i < knsupc; ++i) work[i+lwork] = x[i+ii]; lwork += knsupc; } MPI_Bcast( work, lwork, MPI_DOUBLE, pkk, grid->comm ); } else { MPI_Bcast( work, diag_len[p], MPI_DOUBLE, pkk, grid->comm ); } /* Scatter work[] into global y vector. */ lwork = 0; for (k = p; k < nsupers; k += num_diag_procs) { knsupc = SuperSize( k ); ii = FstBlockC( k ); for (i = 0; i < knsupc; ++i) y[i+ii] = work[i+lwork]; lwork += knsupc; } } } /* GATHER_1RHS_DIAG_TO_ALL */ SuperLU_DIST_5.3.0/SRC/pdgstrf_sherry.c0000644013363400111340000012327013233431301016501 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /* * -- Distributed SuperLU routine (version 1.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley. * September 1, 1999 * * Modified: * Feburary 7, 2001 use MPI_Isend/MPI_Irecv */ #include #include "superlu_ddefs.h" #if ( VAMPIR>=1 ) #include #endif /* * Internal prototypes */ static void pdgstrf2(superlu_options_t *, int_t, double, Glu_persist_t *, gridinfo_t *, LocalLU_t *, MPI_Request *, SuperLUStat_t *, int *); #ifdef _CRAY static void pdgstrs2(int_t, int_t, Glu_persist_t *, gridinfo_t *, LocalLU_t *, SuperLUStat_t *, _fcd, _fcd, _fcd); #else static void pdgstrs2(int_t, int_t, Glu_persist_t *, gridinfo_t *, LocalLU_t *, SuperLUStat_t *); #endif /* * Sketch of the algorithm * ======================= * * The following relations hold: * * A_kk = L_kk * U_kk * * L_ik = Aik * U_kk^(-1) * * U_kj = L_kk^(-1) * A_kj * * ---------------------------------- * | | | * ----|----------------------------- * | | \ U_kk| | * | | \ | U_kj | * | |L_kk \ | || | * ----|-------|---------||---------- * | | | \/ | * | | | | * | | | | * | | | | * | | L_ik ==> A_ij | * | | | | * | | | | * | | | | * ---------------------------------- * * Handle the first block of columns separately. * * Factor diagonal and subdiagonal blocks and test for exact * singularity. ( pdgstrf2(0), one column at a time ) * * Compute block row of U * * Update trailing matrix * * Loop over the remaining blocks of columns. * mycol = MYCOL( iam, grid ); * myrow = MYROW( iam, grid ); * N = nsupers; * For (k = 1; k < N; ++k) { * krow = PROW( k, grid ); * kcol = PCOL( k, grid ); * Pkk = PNUM( krow, kcol, grid ); * * * Factor diagonal and subdiagonal blocks and test for exact * singularity. * if ( mycol == kcol ) { * pdgstrf2(k), one column at a time * } * * * Parallel triangular solve * if ( iam == Pkk ) multicast L_k,k to this process row; * if ( myrow == krow && mycol != kcol ) { * Recv L_k,k from process Pkk; * for (j = k+1; j < N; ++j) * if ( PCOL( j, grid ) == mycol && A_k,j != 0 ) * U_k,j = L_k,k \ A_k,j; * } * * * Parallel rank-k update * if ( myrow == krow ) multicast U_k,k+1:N to this process column; * if ( mycol == kcol ) multicast L_k+1:N,k to this process row; * if ( myrow != krow ) { * Pkj = PNUM( krow, mycol, grid ); * Recv U_k,k+1:N from process Pkj; * } * if ( mycol != kcol ) { * Pik = PNUM( myrow, kcol, grid ); * Recv L_k+1:N,k from process Pik; * } * for (j = k+1; k < N; ++k) { * for (i = k+1; i < N; ++i) * if ( myrow == PROW( i, grid ) && mycol == PCOL( j, grid ) * && L_i,k != 0 && U_k,j != 0 ) * A_i,j = A_i,j - L_i,k * U_k,j; * } * } * * * Remaining issues * (1) Use local indices for L subscripts and SPA. [DONE] * */ /************************************************************************/ int_t pdgstrf /************************************************************************/ ( superlu_options_t *options, int m, int n, double anorm, LUstruct_t *LUstruct, gridinfo_t *grid, SuperLUStat_t *stat, int *info ) /* * Purpose * ======= * * PDGSTRF performs the LU factorization in parallel. * * Arguments * ========= * * options (input) superlu_options_t* * The structure defines the input parameters to control * how the LU decomposition will be performed. * The following field should be defined: * o ReplaceTinyPivot (yes_no_t) * Specifies whether to replace the tiny diagonals by * sqrt(epsilon)*norm(A) during LU factorization. * * m (input) int * Number of rows in the matrix. * * n (input) int * Number of columns in the matrix. * * anorm (input) double * The norm of the original matrix A, or the scaled A if * equilibration was done. * * LUstruct (input/output) LUstruct_t* * The data structures to store the distributed L and U factors. * The following fields should be defined: * * o Glu_persist (input) Glu_persist_t* * Global data structure (xsup, supno) replicated on all processes, * describing the supernode partition in the factored matrices * L and U: * xsup[s] is the leading column of the s-th supernode, * supno[i] is the supernode number to which column i belongs. * * o Llu (input/output) LocalLU_t* * The distributed data structures to store L and U factors. * See superlu_ddefs.h for the definition of 'LocalLU_t'. * * grid (input) gridinfo_t* * The 2D process mesh. It contains the MPI communicator, the number * of process rows (NPROW), the number of process columns (NPCOL), * and my process rank. It is an input argument to all the * parallel routines. * Grid can be initialized by subroutine SUPERLU_GRIDINIT. * See superlu_ddefs.h for the definition of 'gridinfo_t'. * * stat (output) SuperLUStat_t* * Record the statistics on runtime and floating-point operation count. * See util.h for the definition of 'SuperLUStat_t'. * * info (output) int* * = 0: successful exit * < 0: if info = -i, the i-th argument had an illegal value * > 0: if info = i, U(i,i) is exactly zero. The factorization has * been completed, but the factor U is exactly singular, * and division by zero will occur if it is used to solve a * system of equations. * */ { #ifdef _CRAY _fcd ftcs = _cptofcd("N", strlen("N")); _fcd ftcs1 = _cptofcd("L", strlen("L")); _fcd ftcs2 = _cptofcd("N", strlen("N")); _fcd ftcs3 = _cptofcd("U", strlen("U")); #endif double alpha = 1.0, beta = 0.0; int_t *xsup; int_t *lsub, *lsub1, *usub, *Usub_buf, *Lsub_buf_2[2]; /* Need 2 buffers to implement Irecv. */ double *lusup, *lusup1, *uval, *Uval_buf, *Lval_buf_2[2]; /* Need 2 buffers to implement Irecv. */ int_t fnz, i, ib, ijb, ilst, it, iukp, jb, jj, klst, knsupc, lb, lib, ldv, ljb, lptr, lptr0, lptrj, luptr, luptr0, luptrj, nlb, nub, nsupc, rel, rukp; int_t Pc, Pr; int iam, kcol, krow, mycol, myrow, pi, pj; int j, k, lk, nsupers; int nsupr, nbrow, segsize; int msgcnt[4]; /* Count the size of the message xfer'd in each buffer: * 0 : transferred in Lsub_buf[] * 1 : transferred in Lval_buf[] * 2 : transferred in Usub_buf[] * 3 : transferred in Uval_buf[] */ int_t msg0, msg2; int_t **Ufstnz_br_ptr, **Lrowind_bc_ptr; double **Unzval_br_ptr, **Lnzval_bc_ptr; int_t *index; double *nzval; int_t *iuip, *ruip;/* Pointers to U index/nzval; size ceil(NSUPERS/Pr). */ double *ucol; int_t *indirect; double *tempv, *tempv2d; int_t iinfo; int_t *ToRecv, *ToSendD, **ToSendR; Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; superlu_scope_t *scp; float s_eps; double thresh; double *tempU2d, *tempu; int full, ldt, ldu, lead_zero, ncols; MPI_Request recv_req[4], *send_req, *U_diag_blk_send_req = NULL; MPI_Status status; #if ( DEBUGlevel>=2 ) int_t num_copy=0, num_update=0; #endif #if ( PRNTlevel==3 ) int_t zero_msg = 0, total_msg = 0; #endif #if ( PROFlevel>=1 ) double t1, t2; float msg_vol = 0, msg_cnt = 0; int_t iword = sizeof(int_t), dword = sizeof(double); #endif /* Test the input parameters. */ *info = 0; if ( m < 0 ) *info = -2; else if ( n < 0 ) *info = -3; if ( *info ) { pxerbla("pdgstrf", grid, -*info); return (-1); } /* Quick return if possible. */ if ( m == 0 || n == 0 ) return 0; /* * Initialization. */ iam = grid->iam; Pc = grid->npcol; Pr = grid->nprow; myrow = MYROW( iam, grid ); mycol = MYCOL( iam, grid ); nsupers = Glu_persist->supno[n-1] + 1; xsup = Glu_persist->xsup; s_eps = slamch_("Epsilon"); thresh = s_eps * anorm; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter pdgstrf()"); #endif stat->ops[FACT] = 0.0; if ( Pr*Pc > 1 ) { i = Llu->bufmax[0]; if ( !(Llu->Lsub_buf_2[0] = intMalloc_dist(2 * ((size_t)i))) ) ABORT("Malloc fails for Lsub_buf."); Llu->Lsub_buf_2[1] = Llu->Lsub_buf_2[0] + i; i = Llu->bufmax[1]; if ( !(Llu->Lval_buf_2[0] = doubleMalloc_dist(2 * ((size_t)i))) ) ABORT("Malloc fails for Lval_buf[]."); Llu->Lval_buf_2[1] = Llu->Lval_buf_2[0] + i; if ( Llu->bufmax[2] != 0 ) if ( !(Llu->Usub_buf = intMalloc_dist(Llu->bufmax[2])) ) ABORT("Malloc fails for Usub_buf[]."); if ( Llu->bufmax[3] != 0 ) if ( !(Llu->Uval_buf = doubleMalloc_dist(Llu->bufmax[3])) ) ABORT("Malloc fails for Uval_buf[]."); if ( !(U_diag_blk_send_req = (MPI_Request *) SUPERLU_MALLOC(Pr*sizeof(MPI_Request)))) ABORT("Malloc fails for U_diag_blk_send_req[]."); U_diag_blk_send_req[myrow] = 0; /* flag no outstanding Isend */ if ( !(send_req = (MPI_Request *) SUPERLU_MALLOC(2*Pc*sizeof(MPI_Request)))) ABORT("Malloc fails for send_req[]."); } k = sp_ienv_dist(3); /* max supernode size */ if ( !(Llu->ujrow = doubleMalloc_dist(k*(k+1)/2)) ) ABORT("Malloc fails for ujrow[]."); #if ( PRNTlevel>=1 ) if ( !iam ) { printf(".. thresh = s_eps %e * anorm %e = %e\n", s_eps, anorm, thresh); printf(".. Buffer size: Lsub %d\tLval %d\tUsub %d\tUval %d\tLDA %d\n", Llu->bufmax[0], Llu->bufmax[1], Llu->bufmax[2], Llu->bufmax[3], Llu->bufmax[4]); } #endif Lsub_buf_2[0] = Llu->Lsub_buf_2[0]; Lsub_buf_2[1] = Llu->Lsub_buf_2[1]; Lval_buf_2[0] = Llu->Lval_buf_2[0]; Lval_buf_2[1] = Llu->Lval_buf_2[1]; Usub_buf = Llu->Usub_buf; Uval_buf = Llu->Uval_buf; Lrowind_bc_ptr = Llu->Lrowind_bc_ptr; Lnzval_bc_ptr = Llu->Lnzval_bc_ptr; Ufstnz_br_ptr = Llu->Ufstnz_br_ptr; Unzval_br_ptr = Llu->Unzval_br_ptr; ToRecv = Llu->ToRecv; ToSendD = Llu->ToSendD; ToSendR = Llu->ToSendR; ldt = sp_ienv_dist(3); /* Size of maximum supernode */ if ( !(tempv2d = doubleCalloc_dist(2*((size_t)ldt)*ldt)) ) ABORT("Calloc fails for tempv2d[]."); tempU2d = tempv2d + ldt*ldt; if ( !(indirect = intMalloc_dist(ldt)) ) ABORT("Malloc fails for indirect[]."); k = CEILING( nsupers, Pr ); /* Number of local block rows */ if ( !(iuip = intMalloc_dist(k)) ) ABORT("Malloc fails for iuip[]."); if ( !(ruip = intMalloc_dist(k)) ) ABORT("Malloc fails for ruip[]."); #if ( VAMPIR>=1 ) VT_symdef(1, "Send-L", "Comm"); VT_symdef(2, "Recv-L", "Comm"); VT_symdef(3, "Send-U", "Comm"); VT_symdef(4, "Recv-U", "Comm"); VT_symdef(5, "TRF2", "Factor"); VT_symdef(100, "Factor", "Factor"); VT_begin(100); VT_traceon(); #endif /* --------------------------------------------------------------- Handle the first block column separately to start the pipeline. --------------------------------------------------------------- */ if ( mycol == 0 ) { #if ( VAMPIR>=1 ) VT_begin(5); #endif pdgstrf2(options, 0, thresh, Glu_persist, grid, Llu, U_diag_blk_send_req, stat, info); #if ( VAMPIR>=1 ) VT_end(5); #endif scp = &grid->rscp; /* The scope of process row. */ /* Process column *kcol* multicasts numeric values of L(:,k) to process rows. */ lsub = Lrowind_bc_ptr[0]; lusup = Lnzval_bc_ptr[0]; if ( lsub ) { msgcnt[0] = lsub[1] + BC_HEADER + lsub[0]*LB_DESCRIPTOR; msgcnt[1] = lsub[1] * SuperSize( 0 ); } else { msgcnt[0] = msgcnt[1] = 0; } for (pj = 0; pj < Pc; ++pj) { if ( ToSendR[0][pj] != EMPTY ) { #if ( PROFlevel>=1 ) TIC(t1); #endif #if ( VAMPIR>=1 ) VT_begin(1); #endif MPI_Isend( lsub, msgcnt[0], mpi_int_t, pj, 0, scp->comm, &send_req[pj] ); MPI_Isend( lusup, msgcnt[1], MPI_DOUBLE, pj, 1, scp->comm, &send_req[pj+Pc] ); #if ( DEBUGlevel>=2 ) printf("(%d) Send L(:,%4d): lsub %4d, lusup %4d to Pc %2d\n", iam, 0, msgcnt[0], msgcnt[1], pj); #endif #if ( VAMPIR>=1 ) VT_end(1); #endif #if ( PROFlevel>=1 ) TOC(t2, t1); stat->utime[COMM] += t2; msg_cnt += 2; msg_vol += msgcnt[0]*iword + msgcnt[1]*dword; #endif } } /* for pj ... */ } else { /* Post immediate receives. */ if ( ToRecv[0] >= 1 ) { /* Recv block column L(:,0). */ scp = &grid->rscp; /* The scope of process row. */ MPI_Irecv( Lsub_buf_2[0], Llu->bufmax[0], mpi_int_t, 0, 0, scp->comm, &recv_req[0] ); MPI_Irecv( Lval_buf_2[0], Llu->bufmax[1], MPI_DOUBLE, 0, 1, scp->comm, &recv_req[1] ); #if ( DEBUGlevel>=2 ) printf("(%d) Post Irecv L(:,%4d)\n", iam, 0); #endif } } /* if mycol == 0 */ /* ------------------------------------------ MAIN LOOP: Loop through all block columns. ------------------------------------------ */ for (k = 0; k < nsupers; ++k) { knsupc = SuperSize( k ); krow = PROW( k, grid ); kcol = PCOL( k, grid ); if ( mycol == kcol ) { lk = LBj( k, grid ); /* Local block number. */ for (pj = 0; pj < Pc; ++pj) { /* Wait for Isend to complete before using lsub/lusup. */ if ( ToSendR[lk][pj] != EMPTY ) { MPI_Wait( &send_req[pj], &status ); MPI_Wait( &send_req[pj+Pc], &status ); } } lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; } else { if ( ToRecv[k] >= 1 ) { /* Recv block column L(:,k). */ scp = &grid->rscp; /* The scope of process row. */ #if ( PROFlevel>=1 ) TIC(t1); #endif #if ( VAMPIR>=1 ) VT_begin(2); #endif /*probe_recv(iam, kcol, (4*k)%NTAGS, mpi_int_t, scp->comm, Llu->bufmax[0]);*/ /*MPI_Recv( Lsub_buf, Llu->bufmax[0], mpi_int_t, kcol, (4*k)%NTAGS, scp->comm, &status );*/ MPI_Wait( &recv_req[0], &status ); MPI_Get_count( &status, mpi_int_t, &msgcnt[0] ); /*probe_recv(iam, kcol, (4*k+1)%NTAGS, MPI_DOUBLE, scp->comm, Llu->bufmax[1]);*/ /*MPI_Recv( Lval_buf, Llu->bufmax[1], MPI_DOUBLE, kcol, (4*k+1)%NTAGS, scp->comm, &status );*/ MPI_Wait( &recv_req[1], &status ); MPI_Get_count( &status, MPI_DOUBLE, &msgcnt[1] ); #if ( VAMPIR>=1 ) VT_end(2); #endif #if ( PROFlevel>=1 ) TOC(t2, t1); stat->utime[COMM] += t2; #endif #if ( DEBUGlevel>=2 ) printf("(%d) Recv L(:,%4d): lsub %4d, lusup %4d from Pc %2d\n", iam, k, msgcnt[0], msgcnt[1], kcol); fflush(stdout); #endif lsub = Lsub_buf_2[k%2]; lusup = Lval_buf_2[k%2]; #if ( PRNTlevel==3 ) ++total_msg; if ( !msgcnt[0] ) ++zero_msg; #endif } else msgcnt[0] = 0; } /* if mycol = Pc(k) */ scp = &grid->cscp; /* The scope of process column. */ if ( myrow == krow ) { /* Parallel triangular solve across process row *krow* -- U(k,j) = L(k,k) \ A(k,j). */ #ifdef _CRAY pdgstrs2(n, k, Glu_persist, grid, Llu, stat, ftcs1, ftcs2, ftcs3); #else pdgstrs2(n, k, Glu_persist, grid, Llu, stat); #endif /* Multicasts U(k,:) to process columns. */ lk = LBi( k, grid ); usub = Ufstnz_br_ptr[lk]; uval = Unzval_br_ptr[lk]; if ( usub ) { msgcnt[2] = usub[2]; msgcnt[3] = usub[1]; } else { msgcnt[2] = msgcnt[3] = 0; } if ( ToSendD[lk] == YES ) { for (pi = 0; pi < Pr; ++pi) { if ( pi != myrow ) { #if ( PROFlevel>=1 ) TIC(t1); #endif #if ( VAMPIR>=1 ) VT_begin(3); #endif MPI_Send( usub, msgcnt[2], mpi_int_t, pi, (4*k+2)%NTAGS, scp->comm); MPI_Send( uval, msgcnt[3], MPI_DOUBLE, pi, (4*k+3)%NTAGS, scp->comm); #if ( VAMPIR>=1 ) VT_end(3); #endif #if ( PROFlevel>=1 ) TOC(t2, t1); stat->utime[COMM] += t2; msg_cnt += 2; msg_vol += msgcnt[2]*iword + msgcnt[3]*dword; #endif #if ( DEBUGlevel>=2 ) printf("(%d) Send U(%4d,:) to Pr %2d\n", iam, k, pi); #endif } /* if pi ... */ } /* for pi ... */ } /* if ToSendD ... */ } else { /* myrow != krow */ if ( ToRecv[k] == 2 ) { /* Recv block row U(k,:). */ #if ( PROFlevel>=1 ) TIC(t1); #endif #if ( VAMPIR>=1 ) VT_begin(4); #endif /*probe_recv(iam, krow, (4*k+2)%NTAGS, mpi_int_t, scp->comm, Llu->bufmax[2]);*/ MPI_Recv( Usub_buf, Llu->bufmax[2], mpi_int_t, krow, (4*k+2)%NTAGS, scp->comm, &status ); MPI_Get_count( &status, mpi_int_t, &msgcnt[2] ); /*probe_recv(iam, krow, (4*k+3)%NTAGS, MPI_DOUBLE, scp->comm, Llu->bufmax[3]);*/ MPI_Recv( Uval_buf, Llu->bufmax[3], MPI_DOUBLE, krow, (4*k+3)%NTAGS, scp->comm, &status ); MPI_Get_count( &status, MPI_DOUBLE, &msgcnt[3] ); #if ( VAMPIR>=1 ) VT_end(4); #endif #if ( PROFlevel>=1 ) TOC(t2, t1); stat->utime[COMM] += t2; #endif usub = Usub_buf; uval = Uval_buf; #if ( DEBUGlevel>=2 ) printf("(%d) Recv U(%4d,:) from Pr %2d\n", iam, k, krow); #endif #if ( PRNTlevel==3 ) ++total_msg; if ( !msgcnt[2] ) ++zero_msg; #endif } else msgcnt[2] = 0; } /* if myrow == Pr(k) */ /* * Parallel rank-k update; pair up blocks L(i,k) and U(k,j). * for (j = k+1; k < N; ++k) { * for (i = k+1; i < N; ++i) * if ( myrow == PROW( i, grid ) && mycol == PCOL( j, grid ) * && L(i,k) != 0 && U(k,j) != 0 ) * A(i,j) = A(i,j) - L(i,k) * U(k,j); */ msg0 = msgcnt[0]; msg2 = msgcnt[2]; if ( msg0 && msg2 ) { /* L(:,k) and U(k,:) are not empty. */ nsupr = lsub[1]; /* LDA of lusup. */ if ( myrow == krow ) { /* Skip diagonal block L(k,k). */ lptr0 = BC_HEADER + LB_DESCRIPTOR + lsub[BC_HEADER+1]; luptr0 = knsupc; nlb = lsub[0] - 1; } else { lptr0 = BC_HEADER; luptr0 = 0; nlb = lsub[0]; } lptr = lptr0; for (lb = 0; lb < nlb; ++lb) { /* Initialize block row pointers. */ ib = lsub[lptr]; lib = LBi( ib, grid ); iuip[lib] = BR_HEADER; ruip[lib] = 0; lptr += LB_DESCRIPTOR + lsub[lptr+1]; } nub = usub[0]; /* Number of blocks in the block row U(k,:) */ iukp = BR_HEADER; /* Skip header; Pointer to index[] of U(k,:) */ rukp = 0; /* Pointer to nzval[] of U(k,:) */ klst = FstBlockC( k+1 ); /* --------------------------------------------------- Update the first block column A(:,k+1). --------------------------------------------------- */ jb = usub[iukp]; /* Global block number of block U(k,j). */ if ( jb == k+1 ) { /* First update (k+1)-th block. */ --nub; lptr = lptr0; luptr = luptr0; ljb = LBj( jb, grid ); /* Local block number of U(k,j). */ nsupc = SuperSize( jb ); iukp += UB_DESCRIPTOR; /* Start fstnz of block U(k,j). */ /* Prepare to call DGEMM. */ jj = iukp; while ( usub[jj] == klst ) ++jj; ldu = klst - usub[jj++]; ncols = 1; full = 1; for (; jj < iukp+nsupc; ++jj) { segsize = klst - usub[jj]; if ( segsize ) { ++ncols; if ( segsize != ldu ) full = 0; if ( segsize > ldu ) ldu = segsize; } } #if ( DEBUGlevel>=3 ) ++num_update; #endif if ( full ) { tempu = &uval[rukp]; } else { /* Copy block U(k,j) into tempU2d. */ #if ( DEBUGlevel>=3 ) printf("(%d) full=%d,k=%d,jb=%d,ldu=%d,ncols=%d,nsupc=%d\n", iam, full, k, jb, ldu, ncols, nsupc); ++num_copy; #endif tempu = tempU2d; for (jj = iukp; jj < iukp+nsupc; ++jj) { segsize = klst - usub[jj]; if ( segsize ) { lead_zero = ldu - segsize; for (i = 0; i < lead_zero; ++i) tempu[i] = 0.0; tempu += lead_zero; for (i = 0; i < segsize; ++i) tempu[i] = uval[rukp+i]; rukp += segsize; tempu += segsize; } } tempu = tempU2d; rukp -= usub[iukp - 1]; /* Return to start of U(k,j). */ } /* if full ... */ for (lb = 0; lb < nlb; ++lb) { ib = lsub[lptr]; /* Row block L(i,k). */ nbrow = lsub[lptr+1]; /* Number of full rows. */ lptr += LB_DESCRIPTOR; /* Skip descriptor. */ tempv = tempv2d; #ifdef _CRAY SGEMM(ftcs, ftcs, &nbrow, &ncols, &ldu, &alpha, &lusup[luptr+(knsupc-ldu)*nsupr], &nsupr, tempu, &ldu, &beta, tempv, &ldt); #elif defined (USE_VENDOR_BLAS) dgemm_("N", "N", &nbrow, &ncols, &ldu, &alpha, &lusup[luptr+(knsupc-ldu)*nsupr], &nsupr, tempu, &ldu, &beta, tempv, &ldt, 1, 1); #else dgemm_("N", "N", &nbrow, &ncols, &ldu, &alpha, &lusup[luptr+(knsupc-ldu)*nsupr], &nsupr, tempu, &ldu, &beta, tempv, &ldt); #endif stat->ops[FACT] += 2 * nbrow * ldu * ncols; /* Now gather the result into the destination block. */ if ( ib < jb ) { /* A(i,j) is in U. */ ilst = FstBlockC( ib+1 ); lib = LBi( ib, grid ); index = Ufstnz_br_ptr[lib]; ijb = index[iuip[lib]]; while ( ijb < jb ) { /* Search for dest block. */ ruip[lib] += index[iuip[lib]+1]; iuip[lib] += UB_DESCRIPTOR + SuperSize( ijb ); ijb = index[iuip[lib]]; } iuip[lib] += UB_DESCRIPTOR; /* Skip descriptor. */ tempv = tempv2d; for (jj = 0; jj < nsupc; ++jj) { segsize = klst - usub[iukp + jj]; fnz = index[iuip[lib]++]; if ( segsize ) { /* Nonzero segment in U(k.j). */ ucol = &Unzval_br_ptr[lib][ruip[lib]]; for (i = 0, it = 0; i < nbrow; ++i) { rel = lsub[lptr + i] - fnz; ucol[rel] -= tempv[it++]; } tempv += ldt; } ruip[lib] += ilst - fnz; } } else { /* A(i,j) is in L. */ index = Lrowind_bc_ptr[ljb]; ldv = index[1]; /* LDA of the dest lusup. */ lptrj = BC_HEADER; luptrj = 0; ijb = index[lptrj]; while ( ijb != ib ) { /* Search for dest block -- blocks are not ordered! */ luptrj += index[lptrj+1]; lptrj += LB_DESCRIPTOR + index[lptrj+1]; ijb = index[lptrj]; } /* * Build indirect table. This is needed because the * indices are not sorted. */ fnz = FstBlockC( ib ); lptrj += LB_DESCRIPTOR; for (i = 0; i < index[lptrj-1]; ++i) { rel = index[lptrj + i] - fnz; indirect[rel] = i; } nzval = Lnzval_bc_ptr[ljb] + luptrj; tempv = tempv2d; for (jj = 0; jj < nsupc; ++jj) { segsize = klst - usub[iukp + jj]; if ( segsize ) { /*#pragma _CRI cache_bypass nzval,tempv*/ for (it = 0, i = 0; i < nbrow; ++i) { rel = lsub[lptr + i] - fnz; nzval[indirect[rel]] -= tempv[it++]; } tempv += ldt; } nzval += ldv; } } /* if ib < jb ... */ lptr += nbrow; luptr += nbrow; } /* for lb ... */ rukp += usub[iukp - 1]; /* Move to block U(k,j+1) */ iukp += nsupc; } /* if jb == k+1 */ } /* if L(:,k) and U(k,:) not empty */ if ( k+1 < nsupers ) { kcol = PCOL( k+1, grid ); if ( mycol == kcol ) { #if ( VAMPIR>=1 ) VT_begin(5); #endif /* Factor diagonal and subdiagonal blocks and test for exact singularity. */ pdgstrf2(options, k+1, thresh, Glu_persist, grid, Llu, U_diag_blk_send_req, stat, info); #if ( VAMPIR>=1 ) VT_end(5); #endif /* Process column *kcol+1* multicasts numeric values of L(:,k+1) to process rows. */ lk = LBj( k+1, grid ); /* Local block number. */ lsub1 = Lrowind_bc_ptr[lk]; if ( lsub1 ) { msgcnt[0] = lsub1[1] + BC_HEADER + lsub1[0]*LB_DESCRIPTOR; msgcnt[1] = lsub1[1] * SuperSize( k+1 ); } else { msgcnt[0] = 0; msgcnt[1] = 0; } scp = &grid->rscp; /* The scope of process row. */ for (pj = 0; pj < Pc; ++pj) { if ( ToSendR[lk][pj] != EMPTY ) { lusup1 = Lnzval_bc_ptr[lk]; #if ( PROFlevel>=1 ) TIC(t1); #endif #if ( VAMPIR>=1 ) VT_begin(1); #endif MPI_Isend( lsub1, msgcnt[0], mpi_int_t, pj, (4*(k+1))%NTAGS, scp->comm, &send_req[pj] ); MPI_Isend( lusup1, msgcnt[1], MPI_DOUBLE, pj, (4*(k+1)+1)%NTAGS, scp->comm, &send_req[pj+Pc] ); #if ( VAMPIR>=1 ) VT_end(1); #endif #if ( PROFlevel>=1 ) TOC(t2, t1); stat->utime[COMM] += t2; msg_cnt += 2; msg_vol += msgcnt[0]*iword + msgcnt[1]*dword; #endif #if ( DEBUGlevel>=2 ) printf("(%d) Send L(:,%4d): lsub %4d, lusup %4d to Pc %2d\n", iam, k+1, msgcnt[0], msgcnt[1], pj); #endif } } /* for pj ... */ } else { /* Post Recv of block column L(:,k+1). */ if ( ToRecv[k+1] >= 1 ) { scp = &grid->rscp; /* The scope of process row. */ MPI_Irecv(Lsub_buf_2[(k+1)%2], Llu->bufmax[0], mpi_int_t, kcol, (4*(k+1))%NTAGS, scp->comm, &recv_req[0]); MPI_Irecv(Lval_buf_2[(k+1)%2], Llu->bufmax[1], MPI_DOUBLE, kcol, (4*(k+1)+1)%NTAGS, scp->comm, &recv_req[1]); #if ( DEBUGlevel>=2 ) printf("(%d) Post Irecv L(:,%4d)\n", iam, k+1); #endif } } /* if mycol == Pc(k+1) */ } /* if k+1 < nsupers */ if ( msg0 && msg2 ) { /* L(:,k) and U(k,:) are not empty. */ /* --------------------------------------------------- Update all other blocks using block row U(k,:) --------------------------------------------------- */ for (j = 0; j < nub; ++j) { lptr = lptr0; luptr = luptr0; jb = usub[iukp]; /* Global block number of block U(k,j). */ ljb = LBj( jb, grid ); /* Local block number of U(k,j). */ nsupc = SuperSize( jb ); iukp += UB_DESCRIPTOR; /* Start fstnz of block U(k,j). */ /* Prepare to call DGEMM. */ jj = iukp; while ( usub[jj] == klst ) ++jj; ldu = klst - usub[jj++]; ncols = 1; full = 1; for (; jj < iukp+nsupc; ++jj) { segsize = klst - usub[jj]; if ( segsize ) { ++ncols; if ( segsize != ldu ) full = 0; if ( segsize > ldu ) ldu = segsize; } } #if ( DEBUGlevel>=3 ) printf("(%d) full=%d,k=%d,jb=%d,ldu=%d,ncols=%d,nsupc=%d\n", iam, full, k, jb, ldu, ncols, nsupc); ++num_update; #endif if ( full ) { tempu = &uval[rukp]; } else { /* Copy block U(k,j) into tempU2d. */ #if ( DEBUGlevel>=3 ) ++num_copy; #endif tempu = tempU2d; for (jj = iukp; jj < iukp+nsupc; ++jj) { segsize = klst - usub[jj]; if ( segsize ) { lead_zero = ldu - segsize; for (i = 0; i < lead_zero; ++i) tempu[i] = 0.0; tempu += lead_zero; for (i = 0; i < segsize; ++i) tempu[i] = uval[rukp+i]; rukp += segsize; tempu += segsize; } } tempu = tempU2d; rukp -= usub[iukp - 1]; /* Return to start of U(k,j). */ } /* if full ... */ for (lb = 0; lb < nlb; ++lb) { ib = lsub[lptr]; /* Row block L(i,k). */ nbrow = lsub[lptr+1]; /* Number of full rows. */ lptr += LB_DESCRIPTOR; /* Skip descriptor. */ tempv = tempv2d; #ifdef _CRAY SGEMM(ftcs, ftcs, &nbrow, &ncols, &ldu, &alpha, &lusup[luptr+(knsupc-ldu)*nsupr], &nsupr, tempu, &ldu, &beta, tempv, &ldt); #elif defined (USE_VENDOR_BLAS) dgemm_("N", "N", &nbrow, &ncols, &ldu, &alpha, &lusup[luptr+(knsupc-ldu)*nsupr], &nsupr, tempu, &ldu, &beta, tempv, &ldt, 1, 1); #else dgemm_("N", "N", &nbrow, &ncols, &ldu, &alpha, &lusup[luptr+(knsupc-ldu)*nsupr], &nsupr, tempu, &ldu, &beta, tempv, &ldt); #endif stat->ops[FACT] += 2 * nbrow * ldu * ncols; /* Now gather the result into the destination block. */ if ( ib < jb ) { /* A(i,j) is in U. */ ilst = FstBlockC( ib+1 ); lib = LBi( ib, grid ); index = Ufstnz_br_ptr[lib]; ijb = index[iuip[lib]]; while ( ijb < jb ) { /* Search for dest block. */ ruip[lib] += index[iuip[lib]+1]; iuip[lib] += UB_DESCRIPTOR + SuperSize( ijb ); ijb = index[iuip[lib]]; } /* Skip descriptor. Now point to fstnz index of block U(i,j). */ iuip[lib] += UB_DESCRIPTOR; tempv = tempv2d; for (jj = 0; jj < nsupc; ++jj) { segsize = klst - usub[iukp + jj]; fnz = index[iuip[lib]++]; if ( segsize ) { /* Nonzero segment in U(k.j). */ ucol = &Unzval_br_ptr[lib][ruip[lib]]; for (i = 0 ; i < nbrow; ++i) { rel = lsub[lptr + i] - fnz; ucol[rel] -= tempv[i]; } tempv += ldt; } ruip[lib] += ilst - fnz; } } else { /* A(i,j) is in L. */ index = Lrowind_bc_ptr[ljb]; ldv = index[1]; /* LDA of the dest lusup. */ lptrj = BC_HEADER; luptrj = 0; ijb = index[lptrj]; while ( ijb != ib ) { /* Search for dest block -- blocks are not ordered! */ luptrj += index[lptrj+1]; lptrj += LB_DESCRIPTOR + index[lptrj+1]; ijb = index[lptrj]; } /* * Build indirect table. This is needed because the * indices are not sorted for the L blocks. */ fnz = FstBlockC( ib ); lptrj += LB_DESCRIPTOR; for (i = 0; i < index[lptrj-1]; ++i) { rel = index[lptrj + i] - fnz; indirect[rel] = i; } nzval = Lnzval_bc_ptr[ljb] + luptrj; tempv = tempv2d; for (jj = 0; jj < nsupc; ++jj) { segsize = klst - usub[iukp + jj]; if ( segsize ) { /*#pragma _CRI cache_bypass nzval,tempv*/ for (i = 0; i < nbrow; ++i) { rel = lsub[lptr + i] - fnz; nzval[indirect[rel]] -= tempv[i]; } tempv += ldt; } nzval += ldv; } } /* if ib < jb ... */ lptr += nbrow; luptr += nbrow; } /* for lb ... */ rukp += usub[iukp - 1]; /* Move to block U(k,j+1) */ iukp += nsupc; } /* for j ... */ } /* if k L(:,k) and U(k,:) are not empty */ } /* ------------------------------------------ END MAIN LOOP: for k = ... ------------------------------------------ */ #if ( VAMPIR>=1 ) VT_end(100); VT_traceoff(); #endif if ( Pr*Pc > 1 ) { SUPERLU_FREE(Lsub_buf_2[0]); /* also free Lsub_buf_2[1] */ SUPERLU_FREE(Lval_buf_2[0]); /* also free Lval_buf_2[1] */ if ( Llu->bufmax[2] != 0 ) SUPERLU_FREE(Usub_buf); if ( Llu->bufmax[3] != 0 ) SUPERLU_FREE(Uval_buf); SUPERLU_FREE(send_req); if ( U_diag_blk_send_req[myrow] ) { /* wait for last Isend requests to complete, deallocate objects */ for (krow = 0; krow < Pr; ++krow) if ( krow != myrow ) MPI_Wait(U_diag_blk_send_req + krow, &status); } SUPERLU_FREE(U_diag_blk_send_req); } SUPERLU_FREE(Llu->ujrow); SUPERLU_FREE(tempv2d); SUPERLU_FREE(indirect); SUPERLU_FREE(iuip); SUPERLU_FREE(ruip); /* Prepare error message. */ if ( *info == 0 ) *info = n + 1; #if ( PROFlevel>=1 ) TIC(t1); #endif MPI_Allreduce( info, &iinfo, 1, mpi_int_t, MPI_MIN, grid->comm ); #if ( PROFlevel>=1 ) TOC(t2, t1); stat->utime[COMM] += t2; { float msg_vol_max, msg_vol_sum, msg_cnt_max, msg_cnt_sum; MPI_Reduce( &msg_cnt, &msg_cnt_sum, 1, MPI_FLOAT, MPI_SUM, 0, grid->comm ); MPI_Reduce( &msg_cnt, &msg_cnt_max, 1, MPI_FLOAT, MPI_MAX, 0, grid->comm ); MPI_Reduce( &msg_vol, &msg_vol_sum, 1, MPI_FLOAT, MPI_SUM, 0, grid->comm ); MPI_Reduce( &msg_vol, &msg_vol_max, 1, MPI_FLOAT, MPI_MAX, 0, grid->comm ); if ( !iam ) { printf("\tPDGSTRF comm stat:" "\tAvg\tMax\t\tAvg\tMax\n" "\t\t\tCount:\t%.0f\t%.0f\tVol(MB)\t%.2f\t%.2f\n", msg_cnt_sum/Pr/Pc, msg_cnt_max, msg_vol_sum/Pr/Pc*1e-6, msg_vol_max*1e-6); } } #endif if ( iinfo == n + 1 ) *info = 0; else *info = iinfo; #if ( PRNTlevel==3 ) MPI_Allreduce( &zero_msg, &iinfo, 1, mpi_int_t, MPI_SUM, grid->comm ); if ( !iam ) printf(".. # msg of zero size\t%d\n", iinfo); MPI_Allreduce( &total_msg, &iinfo, 1, mpi_int_t, MPI_SUM, grid->comm ); if ( !iam ) printf(".. # total msg\t%d\n", iinfo); #endif #if ( DEBUGlevel>=2 ) for (i = 0; i < Pr * Pc; ++i) { if ( iam == i ) { dPrintLblocks(iam, nsupers, grid, Glu_persist, Llu); dPrintUblocks(iam, nsupers, grid, Glu_persist, Llu); printf("(%d)\n", iam); PrintInt10("Recv", nsupers, Llu->ToRecv); } MPI_Barrier( grid->comm ); } #endif #if ( DEBUGlevel>=3 ) printf("(%d) num_copy=%d, num_update=%d\n", iam, num_copy, num_update); #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pdgstrf()"); #endif } /* PDGSTRF */ /************************************************************************/ static void pdgstrf2 /************************************************************************/ ( superlu_options_t *options, int_t k, double thresh, Glu_persist_t *Glu_persist, gridinfo_t *grid, LocalLU_t *Llu, MPI_Request *U_diag_blk_send_req, SuperLUStat_t *stat, int* info ) /* * Purpose * ======= * * Panel factorization -- block column k * Factor diagonal and subdiagonal blocks and test for exact singularity. * Only the column processes that owns block column *k* participate * in the work. * * Arguments * ========= * * k (input) int (global) * The column number of the block column to be factorized. * * thresh (input) double (global) * The threshold value = s_eps * anorm. * * Glu_persist (input) Glu_persist_t* * Global data structures (xsup, supno) replicated on all processes. * * grid (input) gridinfo_t* * The 2D process mesh. * * Llu (input/output) LocalLU_t* * Local data structures to store distributed L and U matrices. * * U_diag_blk_send_req (input/output) MPI_Request* * List of send requests to send down the diagonal block of U. * * stat (output) SuperLUStat_t* * Record the statistics about the factorization. * See SuperLUStat_t structure defined in util.h. * * info (output) int* * = 0: successful exit * < 0: if info = -i, the i-th argument had an illegal value * > 0: if info = i, U(i,i) is exactly zero. The factorization has * been completed, but the factor U is exactly singular, * and division by zero will occur if it is used to solve a * system of equations. * */ { int cols_left, iam, l, pkk, pr; int incx = 1, incy = 1; int nsupr; /* number of rows in the block (LDA) */ int luptr; int_t i, krow, j, jfst, jlst, u_diag_cnt; int_t nsupc; /* number of columns in the block */ int_t *xsup = Glu_persist->xsup; int_t Pr; MPI_Status status; MPI_Comm comm = (grid->cscp).comm; double *lusup, temp; double *ujrow, *ublk_ptr; /* pointer to the U block */ double alpha = -1; *info = 0; /* Quick return. */ /* Initialization. */ iam = grid->iam; Pr = grid->nprow; krow = PROW( k, grid ); pkk = PNUM( PROW(k, grid), PCOL(k, grid), grid ); j = LBj( k, grid ); /* Local block number */ jfst = FstBlockC( k ); jlst = FstBlockC( k+1 ); lusup = Llu->Lnzval_bc_ptr[j]; nsupc = SuperSize( k ); if ( Llu->Lrowind_bc_ptr[j] ) nsupr = Llu->Lrowind_bc_ptr[j][1]; ublk_ptr = ujrow = Llu->ujrow; luptr = 0; /* point to the diagonal entries. */ cols_left = nsupc; /* supernode size */ u_diag_cnt = 0; if ( iam == pkk ) { /* diagonal process */ if ( U_diag_blk_send_req && U_diag_blk_send_req[krow] ) { /* There are pending sends - wait for all Isend to complete */ for (pr = 0; pr < Pr; ++pr) if (pr != krow) MPI_Wait(U_diag_blk_send_req + pr, &status); } for (j = 0; j < jlst - jfst; ++j) { /* for each column in panel */ /* Diagonal pivot */ i = luptr; if ( options->ReplaceTinyPivot == YES || lusup[i] == 0.0 ) { if ( fabs(lusup[i]) < thresh ) { #if ( PRNTlevel>=2 ) printf("(%d) .. col %d, tiny pivot %e ", iam, jfst+j, lusup[i]); #endif /* Keep the new diagonal entry with the same sign. */ if ( lusup[i] < 0 ) lusup[i] = -thresh; else lusup[i] = thresh; #if ( PRNTlevel>=2 ) printf("replaced by %e\n", lusup[i]); #endif ++(stat->TinyPivots); } } for (l = 0; l < cols_left; ++l, i += nsupr, ++u_diag_cnt) ublk_ptr[u_diag_cnt] = lusup[i]; /* copy one row of U */ if ( ujrow[0] == 0.0 ) { /* Test for singularity. */ *info = j+jfst+1; } else { /* Scale the j-th column. */ temp = 1.0 / ujrow[0]; for (i = luptr+1; i < luptr-j+nsupr; ++i) lusup[i] *= temp; stat->ops[FACT] += nsupr-j-1; } /* Rank-1 update of the trailing submatrix. */ if ( --cols_left ) { l = nsupr - j - 1; #ifdef _CRAY SGER(&l, &cols_left, &alpha, &lusup[luptr+1], &incx, &ujrow[1], &incy, &lusup[luptr+nsupr+1], &nsupr); #else dger_(&l, &cols_left, &alpha, &lusup[luptr+1], &incx, &ujrow[1], &incy, &lusup[luptr+nsupr+1], &nsupr); #endif stat->ops[FACT] += 2 * l * cols_left; } ujrow = ublk_ptr + u_diag_cnt; /* move to next row of U */ luptr += nsupr + 1; /* move to next column */ } /* for column j ... */ if ( U_diag_blk_send_req && iam == pkk ) { /* Send the U block */ /** ALWAYS SEND TO ALL OTHERS - TO FIX **/ for (pr = 0; pr < Pr; ++pr) if (pr != krow) MPI_Isend(ublk_ptr, u_diag_cnt, MPI_DOUBLE, pr, ((k<<2)+2)%NTAGS, comm, U_diag_blk_send_req + pr); U_diag_blk_send_req[krow] = 1; /* flag outstanding Isend */ } } else { /* non-diagonal process */ /* Receive the diagonal block of U */ MPI_Recv(ublk_ptr, (nsupc*(nsupc+1))>>1, MPI_DOUBLE, krow, ((k<<2)+2)%NTAGS, comm, &status); for (j = 0; j < jlst - jfst; ++j) { /* for each column in panel */ u_diag_cnt += cols_left; if ( !lusup ) { /* empty block column */ --cols_left; if ( ujrow[0] == 0.0 ) *info = j+jfst+1; continue; } /* Test for singularity. */ if ( ujrow[0] == 0.0 ) { *info = j+jfst+1; } else { /* Scale the j-th column. */ temp = 1.0 / ujrow[0]; for (i = luptr; i < luptr+nsupr; ++i) lusup[i] *= temp; stat->ops[FACT] += nsupr; } /* Rank-1 update of the trailing submatrix. */ if ( --cols_left ) { #ifdef _CRAY SGER(&nsupr, &cols_left, &alpha, &lusup[luptr], &incx, &ujrow[1], &incy, &lusup[luptr+nsupr], &nsupr); #else dger_(&nsupr, &cols_left, &alpha, &lusup[luptr], &incx, &ujrow[1], &incy, &lusup[luptr+nsupr], &nsupr); #endif stat->ops[FACT] += 2 * nsupr * cols_left; } ujrow = ublk_ptr + u_diag_cnt; /* move to next row of U */ luptr += nsupr; /* move to next column */ } /* for column j ... */ } /* end if pkk ... */ } /* PDGSTRF2 */ /************************************************************************/ static void pdgstrs2 /************************************************************************/ #ifdef _CRAY ( int_t m, int_t k, Glu_persist_t *Glu_persist, gridinfo_t *grid, LocalLU_t *Llu, SuperLUStat_t *stat, _fcd ftcs1, _fcd ftcs2, _fcd ftcs3 ) #else ( int_t m, int_t k, Glu_persist_t *Glu_persist, gridinfo_t *grid, LocalLU_t *Llu, SuperLUStat_t *stat ) #endif /* * Purpose * ======= * Perform parallel triangular solves * U(k,:) := A(k,:) \ L(k,k). * Only the process row that owns block row *k* participates * in the work. * * Arguments * ========= * * m (input) int (global) * Number of rows in the matrix. * * k (input) int (global) * The row number of the block row to be factorized. * * Glu_persist (input) Glu_persist_t* * Global data structures (xsup, supno) replicated on all processes. * * grid (input) gridinfo_t* * The 2D process mesh. * * Llu (input/output) LocalLU_t* * Local data structures to store distributed L and U matrices. * * stat (output) SuperLUStat_t* * Record the statistics about the factorization; * See SuperLUStat_t structure defined in util.h. * */ { int iam, pkk; int incx = 1; int nsupr; /* number of rows in the block L(:,k) (LDA) */ int segsize; int_t nsupc; /* number of columns in the block */ int_t luptr, iukp, rukp; int_t b, gb, j, klst, knsupc, lk, nb; int_t *xsup = Glu_persist->xsup; int_t *usub; double *lusup, *uval; /* Quick return. */ lk = LBi( k, grid ); /* Local block number */ if ( !Llu->Unzval_br_ptr[lk] ) return; /* Initialization. */ iam = grid->iam; pkk = PNUM( PROW(k, grid), PCOL(k, grid), grid ); klst = FstBlockC( k+1 ); knsupc = SuperSize( k ); usub = Llu->Ufstnz_br_ptr[lk]; /* index[] of block row U(k,:) */ uval = Llu->Unzval_br_ptr[lk]; nb = usub[0]; iukp = BR_HEADER; rukp = 0; if ( iam == pkk ) { lk = LBj( k, grid ); nsupr = Llu->Lrowind_bc_ptr[lk][1]; /* LDA of lusup[] */ lusup = Llu->Lnzval_bc_ptr[lk]; } else { nsupr = Llu->Lsub_buf_2[k%2][1]; /* LDA of lusup[] */ lusup = Llu->Lval_buf_2[k%2]; } /* Loop through all the row blocks. */ for (b = 0; b < nb; ++b) { gb = usub[iukp]; nsupc = SuperSize( gb ); iukp += UB_DESCRIPTOR; /* Loop through all the segments in the block. */ for (j = 0; j < nsupc; ++j) { segsize = klst - usub[iukp++]; if ( segsize ) { /* Nonzero segment. */ luptr = (knsupc - segsize) * (nsupr + 1); #ifdef _CRAY STRSV(ftcs1, ftcs2, ftcs3, &segsize, &lusup[luptr], &nsupr, &uval[rukp], &incx); #elif defined (USE_VENDOR_BLAS) dtrsv_("L", "N", "U", &segsize, &lusup[luptr], &nsupr, &uval[rukp], &incx, 1, 1, 1); #else dtrsv_("L", "N", "U", &segsize, &lusup[luptr], &nsupr, &uval[rukp], &incx); #endif stat->ops[FACT] += segsize * (segsize + 1); rukp += segsize; } } } /* for b ... */ } /* PDGSTRS2 */ static int probe_recv(int iam, int source, int tag, MPI_Datatype datatype, MPI_Comm comm, int buf_size) { MPI_Status status; int count; MPI_Probe( source, tag, comm, &status ); MPI_Get_count( &status, datatype, &count ); if ( count > buf_size ) { printf("(%d) Recv'ed count %d > buffer size $d\n", iam, count, buf_size); exit(-1); } return 0; } SuperLU_DIST_5.3.0/SRC/zsp_blas3_dist.c0000644013363400111340000001106713233431301016357 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Sparse BLAS3, using some dense BLAS3 operations * *
 * -- Distributed SuperLU routine (version 1.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 * 
*/ /* * File name: sp_blas3.c * Purpose: Sparse BLAS3, using some dense BLAS3 operations. */ #include "superlu_zdefs.h" /*! \brief
  Purpose   
    =======   

    sp_z performs one of the matrix-matrix operations   

       C := alpha*op( A )*op( B ) + beta*C,   

    where  op( X ) is one of 

       op( X ) = X   or   op( X ) = X'   or   op( X ) = conjg( X' ),

    alpha and beta are scalars, and A, B and C are matrices, with op( A ) 
    an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix. 
  

    Parameters   
    ==========   

    TRANSA - (input) char*
             On entry, TRANSA specifies the form of op( A ) to be used in 
             the matrix multiplication as follows:   
                TRANSA = 'N' or 'n',  op( A ) = A.   
                TRANSA = 'T' or 't',  op( A ) = A'.   
                TRANSA = 'C' or 'c',  op( A ) = conjg( A' ).   
             Unchanged on exit.   

    TRANSB - (input) char*
             On entry, TRANSB specifies the form of op( B ) to be used in 
             the matrix multiplication as follows:   
                TRANSB = 'N' or 'n',  op( B ) = B.   
                TRANSB = 'T' or 't',  op( B ) = B'.   
                TRANSB = 'C' or 'c',  op( B ) = conjg( B' ).   
             Unchanged on exit.   

    M      - (input) int   
             On entry,  M  specifies  the number of rows of the matrix 
	     op( A ) and of the matrix C.  M must be at least zero. 
	     Unchanged on exit.   

    N      - (input) int
             On entry,  N specifies the number of columns of the matrix 
	     op( B ) and the number of columns of the matrix C. N must be 
	     at least zero.
	     Unchanged on exit.   

    K      - (input) int
             On entry, K specifies the number of columns of the matrix 
	     op( A ) and the number of rows of the matrix op( B ). K must 
	     be at least  zero.   
             Unchanged on exit.
	     
    ALPHA  - (input) doublecomplex
             On entry, ALPHA specifies the scalar alpha.   

    A      - (input) SuperMatrix*
             Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
             Currently, the type of A can be:
                 Stype = NC or NCP; Dtype = Z; Mtype = GE. 
             In the future, more general A can be handled.

    B      - DOUBLE COMPLEX PRECISION array of DIMENSION ( LDB, kb ), where kb is 
             n when TRANSB = 'N' or 'n',  and is  k otherwise.   
             Before entry with  TRANSB = 'N' or 'n',  the leading k by n 
             part of the array B must contain the matrix B, otherwise 
             the leading n by k part of the array B must contain the 
             matrix B.   
             Unchanged on exit.   

    LDB    - (input) int
             On entry, LDB specifies the first dimension of B as declared 
             in the calling (sub) program. LDB must be at least max( 1, n ).  
             Unchanged on exit.   

    BETA   - (input) doublecomplex
             On entry, BETA specifies the scalar beta. When BETA is   
             supplied as zero then C need not be set on input.   

    C      - DOUBLE COMPLEX PRECISION array of DIMENSION ( LDC, n ).   
             Before entry, the leading m by n part of the array C must 
             contain the matrix C,  except when beta is zero, in which 
             case C need not be set on entry.   
             On exit, the array C is overwritten by the m by n matrix 
	     ( alpha*op( A )*B + beta*C ).   

    LDC    - (input) int
             On entry, LDC specifies the first dimension of C as declared 
             in the calling (sub)program. LDC must be at least max(1,m).   
             Unchanged on exit.   

    ==== Sparse Level 3 Blas routine.  
*/ int sp_zgemm_dist(char *transa, int n, doublecomplex alpha, SuperMatrix *A, doublecomplex *b, int ldb, doublecomplex beta, doublecomplex *c, int ldc) { int incx = 1, incy = 1; int j; for (j = 0; j < n; ++j) { sp_zgemv_dist(transa, alpha, A, &b[ldb*j], incx, beta, &c[ldc*j], incy); } return 0; } SuperLU_DIST_5.3.0/SRC/colamd.c0000644013363400111340000030562513233431301014701 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file colamd.c *\brief A sparse matrix column ordering algorithm
    ========================================================================== 
    === colamd/symamd - a sparse matrix column ordering algorithm ============ 
    ========================================================================== 


    colamd:  an approximate minimum degree column ordering algorithm,
    	for LU factorization of symmetric or unsymmetric matrices,
	QR factorization, least squares, interior point methods for
	linear programming problems, and other related problems.

    symamd:  an approximate minimum degree ordering algorithm for Cholesky
    	factorization of symmetric matrices.

    Purpose:

	Colamd computes a permutation Q such that the Cholesky factorization of
	(AQ)'(AQ) has less fill-in and requires fewer floating point operations
	than A'A.  This also provides a good ordering for sparse partial
	pivoting methods, P(AQ) = LU, where Q is computed prior to numerical
	factorization, and P is computed during numerical factorization via
	conventional partial pivoting with row interchanges.  Colamd is the
	column ordering method used in SuperLU, part of the ScaLAPACK library.
	It is also available as built-in function in MATLAB Version 6,
	available from MathWorks, Inc. (http://www.mathworks.com).  This
	routine can be used in place of colmmd in MATLAB.

    	Symamd computes a permutation P of a symmetric matrix A such that the
	Cholesky factorization of PAP' has less fill-in and requires fewer
	floating point operations than A.  Symamd constructs a matrix M such
	that M'M has the same nonzero pattern of A, and then orders the columns
	of M using colmmd.  The column ordering of M is then returned as the
	row and column ordering P of A. 

    Authors:

	The authors of the code itself are Stefan I. Larimore and Timothy A.
	Davis (davis@cise.ufl.edu), University of Florida.  The algorithm was
	developed in collaboration with John Gilbert, Xerox PARC, and Esmond
	Ng, Oak Ridge National Laboratory.

    Date:

	September 8, 2003.  Version 2.3.

    Acknowledgements:

	This work was supported by the National Science Foundation, under
	grants DMS-9504974 and DMS-9803599.

    Copyright and License:

	Copyright (c) 1998-2003 by the University of Florida.
	All Rights Reserved.

	THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
	EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.

	Permission is hereby granted to use, copy, modify, and/or distribute
	this program, provided that the Copyright, this License, and the
	Availability of the original version is retained on all copies and made
	accessible to the end-user of any code or package that includes COLAMD
	or any modified version of COLAMD. 

    Availability:

	The colamd/symamd library is available at

	    http://www.cise.ufl.edu/research/sparse/colamd/

	This is the http://www.cise.ufl.edu/research/sparse/colamd/colamd.c
	file.  It requires the colamd.h file.  It is required by the colamdmex.c
	and symamdmex.c files, for the MATLAB interface to colamd and symamd.

    See the ChangeLog file for changes since Version 1.0.

    ========================================================================== 
    === Description of user-callable routines ================================ 
    ========================================================================== 


    ----------------------------------------------------------------------------
    colamd_recommended:
    ----------------------------------------------------------------------------

	C syntax:

	    #include "colamd.h"
	    int colamd_recommended (int nnz, int n_row, int n_col) ;

	    or as a C macro

	    #include "colamd.h"
	    Alen = COLAMD_RECOMMENDED (int nnz, int n_row, int n_col) ;

	Purpose:

	    Returns recommended value of Alen for use by colamd.  Returns -1
	    if any input argument is negative.  The use of this routine
	    or macro is optional.  Note that the macro uses its arguments
	    more than once, so be careful for side effects, if you pass
	    expressions as arguments to COLAMD_RECOMMENDED.  Not needed for
	    symamd, which dynamically allocates its own memory.

	Arguments (all input arguments):

	    int nnz ;		Number of nonzeros in the matrix A.  This must
				be the same value as p [n_col] in the call to
				colamd - otherwise you will get a wrong value
				of the recommended memory to use.

	    int n_row ;		Number of rows in the matrix A.

	    int n_col ;		Number of columns in the matrix A.

    ----------------------------------------------------------------------------
    colamd_set_defaults:
    ----------------------------------------------------------------------------

	C syntax:

	    #include "colamd.h"
	    colamd_set_defaults (double knobs [COLAMD_KNOBS]) ;

	Purpose:

	    Sets the default parameters.  The use of this routine is optional.

	Arguments:

	    double knobs [COLAMD_KNOBS] ;	Output only.

		Colamd: rows with more than (knobs [COLAMD_DENSE_ROW] * n_col)
		entries are removed prior to ordering.  Columns with more than
		(knobs [COLAMD_DENSE_COL] * n_row) entries are removed prior to
		ordering, and placed last in the output column ordering. 

		Symamd: uses only knobs [COLAMD_DENSE_ROW], which is knobs [0].
		Rows and columns with more than (knobs [COLAMD_DENSE_ROW] * n)
		entries are removed prior to ordering, and placed last in the
		output ordering.

		COLAMD_DENSE_ROW and COLAMD_DENSE_COL are defined as 0 and 1,
		respectively, in colamd.h.  Default values of these two knobs
		are both 0.5.  Currently, only knobs [0] and knobs [1] are
		used, but future versions may use more knobs.  If so, they will
		be properly set to their defaults by the future version of
		colamd_set_defaults, so that the code that calls colamd will
		not need to change, assuming that you either use
		colamd_set_defaults, or pass a (double *) NULL pointer as the
		knobs array to colamd or symamd.

    ----------------------------------------------------------------------------
    colamd:
    ----------------------------------------------------------------------------

	C syntax:

	    #include "colamd.h"
	    int colamd (int n_row, int n_col, int Alen, int *A, int *p,
	    	double knobs [COLAMD_KNOBS], int stats [COLAMD_STATS]) ;

	Purpose:

	    Computes a column ordering (Q) of A such that P(AQ)=LU or
	    (AQ)'AQ=LL' have less fill-in and require fewer floating point
	    operations than factorizing the unpermuted matrix A or A'A,
	    respectively.
	    
	Returns:

	    TRUE (1) if successful, FALSE (0) otherwise.

	Arguments:

	    int n_row ;		Input argument.

		Number of rows in the matrix A.
		Restriction:  n_row >= 0.
		Colamd returns FALSE if n_row is negative.

	    int n_col ;		Input argument.

		Number of columns in the matrix A.
		Restriction:  n_col >= 0.
		Colamd returns FALSE if n_col is negative.

	    int Alen ;		Input argument.

		Restriction (see note):
		Alen >= 2*nnz + 6*(n_col+1) + 4*(n_row+1) + n_col
		Colamd returns FALSE if these conditions are not met.

		Note:  this restriction makes an modest assumption regarding
		the size of the two typedef's structures in colamd.h.
		We do, however, guarantee that

			Alen >= colamd_recommended (nnz, n_row, n_col)
		
		or equivalently as a C preprocessor macro: 

			Alen >= COLAMD_RECOMMENDED (nnz, n_row, n_col)

		will be sufficient.

	    int A [Alen] ;	Input argument, undefined on output.

		A is an integer array of size Alen.  Alen must be at least as
		large as the bare minimum value given above, but this is very
		low, and can result in excessive run time.  For best
		performance, we recommend that Alen be greater than or equal to
		colamd_recommended (nnz, n_row, n_col), which adds
		nnz/5 to the bare minimum value given above.

		On input, the row indices of the entries in column c of the
		matrix are held in A [(p [c]) ... (p [c+1]-1)].  The row indices
		in a given column c need not be in ascending order, and
		duplicate row indices may be be present.  However, colamd will
		work a little faster if both of these conditions are met
		(Colamd puts the matrix into this format, if it finds that the
		the conditions are not met).

		The matrix is 0-based.  That is, rows are in the range 0 to
		n_row-1, and columns are in the range 0 to n_col-1.  Colamd
		returns FALSE if any row index is out of range.

		The contents of A are modified during ordering, and are
		undefined on output.

	    int p [n_col+1] ;	Both input and output argument.

		p is an integer array of size n_col+1.  On input, it holds the
		"pointers" for the column form of the matrix A.  Column c of
		the matrix A is held in A [(p [c]) ... (p [c+1]-1)].  The first
		entry, p [0], must be zero, and p [c] <= p [c+1] must hold
		for all c in the range 0 to n_col-1.  The value p [n_col] is
		thus the total number of entries in the pattern of the matrix A.
		Colamd returns FALSE if these conditions are not met.

		On output, if colamd returns TRUE, the array p holds the column
		permutation (Q, for P(AQ)=LU or (AQ)'(AQ)=LL'), where p [0] is
		the first column index in the new ordering, and p [n_col-1] is
		the last.  That is, p [k] = j means that column j of A is the
		kth pivot column, in AQ, where k is in the range 0 to n_col-1
		(p [0] = j means that column j of A is the first column in AQ).

		If colamd returns FALSE, then no permutation is returned, and
		p is undefined on output.

	    double knobs [COLAMD_KNOBS] ;	Input argument.

		See colamd_set_defaults for a description.

	    int stats [COLAMD_STATS] ;		Output argument.

		Statistics on the ordering, and error status.
		See colamd.h for related definitions.
		Colamd returns FALSE if stats is not present.

		stats [0]:  number of dense or empty rows ignored.

		stats [1]:  number of dense or empty columns ignored (and
				ordered last in the output permutation p)
				Note that a row can become "empty" if it
				contains only "dense" and/or "empty" columns,
				and similarly a column can become "empty" if it
				only contains "dense" and/or "empty" rows.

		stats [2]:  number of garbage collections performed.
				This can be excessively high if Alen is close
				to the minimum required value.

		stats [3]:  status code.  < 0 is an error code.
			    > 1 is a warning or notice.

			0	OK.  Each column of the input matrix contained
				row indices in increasing order, with no
				duplicates.

			1	OK, but columns of input matrix were jumbled
				(unsorted columns or duplicate entries).  Colamd
				had to do some extra work to sort the matrix
				first and remove duplicate entries, but it
				still was able to return a valid permutation
				(return value of colamd was TRUE).

					stats [4]: highest numbered column that
						is unsorted or has duplicate
						entries.
					stats [5]: last seen duplicate or
						unsorted row index.
					stats [6]: number of duplicate or
						unsorted row indices.

			-1	A is a null pointer

			-2	p is a null pointer

			-3 	n_row is negative

					stats [4]: n_row

			-4	n_col is negative

					stats [4]: n_col

			-5	number of nonzeros in matrix is negative

					stats [4]: number of nonzeros, p [n_col]

			-6	p [0] is nonzero

					stats [4]: p [0]

			-7	A is too small

					stats [4]: required size
					stats [5]: actual size (Alen)

			-8	a column has a negative number of entries

					stats [4]: column with < 0 entries
					stats [5]: number of entries in col

			-9	a row index is out of bounds

					stats [4]: column with bad row index
					stats [5]: bad row index
					stats [6]: n_row, # of rows of matrx

			-10	(unused; see symamd.c)

			-999	(unused; see symamd.c)

		Future versions may return more statistics in the stats array.

	Example:
	
	    See http://www.cise.ufl.edu/research/sparse/colamd/example.c
	    for a complete example.

	    To order the columns of a 5-by-4 matrix with 11 nonzero entries in
	    the following nonzero pattern

	    	x 0 x 0
		x 0 x x
		0 x x 0
		0 0 x x
		x x 0 0

	    with default knobs and no output statistics, do the following:

		#include "colamd.h"
		#define ALEN COLAMD_RECOMMENDED (11, 5, 4)
		int A [ALEN] = {1, 2, 5, 3, 5, 1, 2, 3, 4, 2, 4} ;
		int p [ ] = {0, 3, 5, 9, 11} ;
		int stats [COLAMD_STATS] ;
		colamd (5, 4, ALEN, A, p, (double *) NULL, stats) ;

	    The permutation is returned in the array p, and A is destroyed.

    ----------------------------------------------------------------------------
    symamd:
    ----------------------------------------------------------------------------

	C syntax:

	    #include "colamd.h"
	    int symamd (int n, int *A, int *p, int *perm,
	    	double knobs [COLAMD_KNOBS], int stats [COLAMD_STATS],
		void (*allocate) (size_t, size_t), void (*release) (void *)) ;

	Purpose:

    	    The symamd routine computes an ordering P of a symmetric sparse
	    matrix A such that the Cholesky factorization PAP' = LL' remains
	    sparse.  It is based on a column ordering of a matrix M constructed
	    so that the nonzero pattern of M'M is the same as A.  The matrix A
	    is assumed to be symmetric; only the strictly lower triangular part
	    is accessed.  You must pass your selected memory allocator (usually
	    calloc/free or mxCalloc/mxFree) to symamd, for it to allocate
	    memory for the temporary matrix M.

	Returns:

	    TRUE (1) if successful, FALSE (0) otherwise.

	Arguments:

	    int n ;		Input argument.

	    	Number of rows and columns in the symmetrix matrix A.
		Restriction:  n >= 0.
		Symamd returns FALSE if n is negative.

	    int A [nnz] ;	Input argument.

	    	A is an integer array of size nnz, where nnz = p [n].
		
		The row indices of the entries in column c of the matrix are
		held in A [(p [c]) ... (p [c+1]-1)].  The row indices in a
		given column c need not be in ascending order, and duplicate
		row indices may be present.  However, symamd will run faster
		if the columns are in sorted order with no duplicate entries. 

		The matrix is 0-based.  That is, rows are in the range 0 to
		n-1, and columns are in the range 0 to n-1.  Symamd
		returns FALSE if any row index is out of range.

		The contents of A are not modified.

	    int p [n+1] ;   	Input argument.

		p is an integer array of size n+1.  On input, it holds the
		"pointers" for the column form of the matrix A.  Column c of
		the matrix A is held in A [(p [c]) ... (p [c+1]-1)].  The first
		entry, p [0], must be zero, and p [c] <= p [c+1] must hold
		for all c in the range 0 to n-1.  The value p [n] is
		thus the total number of entries in the pattern of the matrix A.
		Symamd returns FALSE if these conditions are not met.

		The contents of p are not modified.

	    int perm [n+1] ;   	Output argument.

		On output, if symamd returns TRUE, the array perm holds the
		permutation P, where perm [0] is the first index in the new
		ordering, and perm [n-1] is the last.  That is, perm [k] = j
		means that row and column j of A is the kth column in PAP',
		where k is in the range 0 to n-1 (perm [0] = j means
		that row and column j of A are the first row and column in
		PAP').  The array is used as a workspace during the ordering,
		which is why it must be of length n+1, not just n.

	    double knobs [COLAMD_KNOBS] ;	Input argument.

		See colamd_set_defaults for a description.

	    int stats [COLAMD_STATS] ;		Output argument.

		Statistics on the ordering, and error status.
		See colamd.h for related definitions.
		Symamd returns FALSE if stats is not present.

		stats [0]:  number of dense or empty row and columns ignored
				(and ordered last in the output permutation 
				perm).  Note that a row/column can become
				"empty" if it contains only "dense" and/or
				"empty" columns/rows.

		stats [1]:  (same as stats [0])

		stats [2]:  number of garbage collections performed.

		stats [3]:  status code.  < 0 is an error code.
			    > 1 is a warning or notice.

			0	OK.  Each column of the input matrix contained
				row indices in increasing order, with no
				duplicates.

			1	OK, but columns of input matrix were jumbled
				(unsorted columns or duplicate entries).  Symamd
				had to do some extra work to sort the matrix
				first and remove duplicate entries, but it
				still was able to return a valid permutation
				(return value of symamd was TRUE).

					stats [4]: highest numbered column that
						is unsorted or has duplicate
						entries.
					stats [5]: last seen duplicate or
						unsorted row index.
					stats [6]: number of duplicate or
						unsorted row indices.

			-1	A is a null pointer

			-2	p is a null pointer

			-3	(unused, see colamd.c)

			-4 	n is negative

					stats [4]: n

			-5	number of nonzeros in matrix is negative

					stats [4]: # of nonzeros (p [n]).

			-6	p [0] is nonzero

					stats [4]: p [0]

			-7	(unused)

			-8	a column has a negative number of entries

					stats [4]: column with < 0 entries
					stats [5]: number of entries in col

			-9	a row index is out of bounds

					stats [4]: column with bad row index
					stats [5]: bad row index
					stats [6]: n_row, # of rows of matrx

			-10	out of memory (unable to allocate temporary
				workspace for M or count arrays using the
				"allocate" routine passed into symamd).

			-999	internal error.  colamd failed to order the
				matrix M, when it should have succeeded.  This
				indicates a bug.  If this (and *only* this)
				error code occurs, please contact the authors.
				Don't contact the authors if you get any other
				error code.

		Future versions may return more statistics in the stats array.

	    void * (*allocate) (size_t, size_t)

	    	A pointer to a function providing memory allocation.  The
		allocated memory must be returned initialized to zero.  For a
		C application, this argument should normally be a pointer to
		calloc.  For a MATLAB mexFunction, the routine mxCalloc is
		passed instead.

	    void (*release) (size_t, size_t)

	    	A pointer to a function that frees memory allocated by the
		memory allocation routine above.  For a C application, this
		argument should normally be a pointer to free.  For a MATLAB
		mexFunction, the routine mxFree is passed instead.


    ----------------------------------------------------------------------------
    colamd_report:
    ----------------------------------------------------------------------------

	C syntax:

	    #include "colamd.h"
	    colamd_report (int stats [COLAMD_STATS]) ;

	Purpose:

	    Prints the error status and statistics recorded in the stats
	    array on the standard error output (for a standard C routine)
	    or on the MATLAB output (for a mexFunction).

	Arguments:

	    int stats [COLAMD_STATS] ;	Input only.  Statistics from colamd.


    ----------------------------------------------------------------------------
    symamd_report:
    ----------------------------------------------------------------------------

	C syntax:

	    #include "colamd.h"
	    symamd_report (int stats [COLAMD_STATS]) ;

	Purpose:

	    Prints the error status and statistics recorded in the stats
	    array on the standard error output (for a standard C routine)
	    or on the MATLAB output (for a mexFunction).

	Arguments:

	    int stats [COLAMD_STATS] ;	Input only.  Statistics from symamd.

 
*/ /* ========================================================================== */ /* === Scaffolding code definitions ======================================== */ /* ========================================================================== */ /* Ensure that debugging is turned off: */ #ifndef NDEBUG #define NDEBUG #endif /* NDEBUG */ /* Our "scaffolding code" philosophy: In our opinion, well-written library code should keep its "debugging" code, and just normally have it turned off by the compiler so as not to interfere with performance. This serves several purposes: (1) assertions act as comments to the reader, telling you what the code expects at that point. All assertions will always be true (unless there really is a bug, of course). (2) leaving in the scaffolding code assists anyone who would like to modify the code, or understand the algorithm (by reading the debugging output, one can get a glimpse into what the code is doing). (3) (gasp!) for actually finding bugs. This code has been heavily tested and "should" be fully functional and bug-free ... but you never know... To enable debugging, comment out the "#define NDEBUG" above. For a MATLAB mexFunction, you will also need to modify mexopts.sh to remove the -DNDEBUG definition. The code will become outrageously slow when debugging is enabled. To control the level of debugging output, set an environment variable D to 0 (little), 1 (some), 2, 3, or 4 (lots). When debugging, you should see the following message on the standard output: colamd: debug version, D = 1 (THIS WILL BE SLOW!) or a similar message for symamd. If you don't, then debugging has not been enabled. */ /* ========================================================================== */ /* === Include files ======================================================== */ /* ========================================================================== */ #include "colamd.h" #include #ifdef MATLAB_MEX_FILE #include "mex.h" #include "matrix.h" #else #include #include #endif /* MATLAB_MEX_FILE */ /* ========================================================================== */ /* === Definitions ========================================================== */ /* ========================================================================== */ /* Routines are either PUBLIC (user-callable) or PRIVATE (not user-callable) */ #define PUBLIC #define PRIVATE static #define MAX(a,b) (((a) > (b)) ? (a) : (b)) #define MIN(a,b) (((a) < (b)) ? (a) : (b)) #define ONES_COMPLEMENT(r) (-(r)-1) /* -------------------------------------------------------------------------- */ /* Change for version 2.1: define TRUE and FALSE only if not yet defined */ /* -------------------------------------------------------------------------- */ #ifndef TRUE #define TRUE (1) #endif #ifndef FALSE #define FALSE (0) #endif /* -------------------------------------------------------------------------- */ #define EMPTY (-1) /* Row and column status */ #define ALIVE (0) #define DEAD (-1) /* Column status */ #define DEAD_PRINCIPAL (-1) #define DEAD_NON_PRINCIPAL (-2) /* Macros for row and column status update and checking. */ #define ROW_IS_DEAD(r) ROW_IS_MARKED_DEAD (Row[r].shared2.mark) #define ROW_IS_MARKED_DEAD(row_mark) (row_mark < ALIVE) #define ROW_IS_ALIVE(r) (Row [r].shared2.mark >= ALIVE) #define COL_IS_DEAD(c) (Col [c].start < ALIVE) #define COL_IS_ALIVE(c) (Col [c].start >= ALIVE) #define COL_IS_DEAD_PRINCIPAL(c) (Col [c].start == DEAD_PRINCIPAL) #define KILL_ROW(r) { Row [r].shared2.mark = DEAD ; } #define KILL_PRINCIPAL_COL(c) { Col [c].start = DEAD_PRINCIPAL ; } #define KILL_NON_PRINCIPAL_COL(c) { Col [c].start = DEAD_NON_PRINCIPAL ; } /* ========================================================================== */ /* === Colamd reporting mechanism =========================================== */ /* ========================================================================== */ #ifdef MATLAB_MEX_FILE /* use mexPrintf in a MATLAB mexFunction, for debugging and statistics output */ #define PRINTF mexPrintf /* In MATLAB, matrices are 1-based to the user, but 0-based internally */ #define INDEX(i) ((i)+1) #else /* Use printf in standard C environment, for debugging and statistics output. */ /* Output is generated only if debugging is enabled at compile time, or if */ /* the caller explicitly calls colamd_report or symamd_report. */ #define PRINTF printf /* In C, matrices are 0-based and indices are reported as such in *_report */ #define INDEX(i) (i) #endif /* MATLAB_MEX_FILE */ /* ========================================================================== */ /* === Prototypes of PRIVATE routines ======================================= */ /* ========================================================================== */ PRIVATE int init_rows_cols ( int n_row, int n_col, Colamd_Row Row [], Colamd_Col Col [], int A [], int p [], int stats [COLAMD_STATS] ) ; PRIVATE void init_scoring ( int n_row, int n_col, Colamd_Row Row [], Colamd_Col Col [], int A [], int head [], double knobs [COLAMD_KNOBS], int *p_n_row2, int *p_n_col2, int *p_max_deg ) ; PRIVATE int find_ordering ( int n_row, int n_col, int Alen, Colamd_Row Row [], Colamd_Col Col [], int A [], int head [], int n_col2, int max_deg, int pfree ) ; PRIVATE void order_children ( int n_col, Colamd_Col Col [], int p [] ) ; PRIVATE void detect_super_cols ( #ifndef NDEBUG int n_col, Colamd_Row Row [], #endif /* NDEBUG */ Colamd_Col Col [], int A [], int head [], int row_start, int row_length ) ; PRIVATE int garbage_collection ( int n_row, int n_col, Colamd_Row Row [], Colamd_Col Col [], int A [], int *pfree ) ; PRIVATE int clear_mark ( int n_row, Colamd_Row Row [] ) ; PRIVATE void print_report ( char *method, int stats [COLAMD_STATS] ) ; /* ========================================================================== */ /* === Debugging prototypes and definitions ================================= */ /* ========================================================================== */ #ifndef NDEBUG /* colamd_debug is the *ONLY* global variable, and is only */ /* present when debugging */ PRIVATE int colamd_debug ; /* debug print level */ #define DEBUG0(params) { (void) PRINTF params ; } #define DEBUG1(params) { if (colamd_debug >= 1) (void) PRINTF params ; } #define DEBUG2(params) { if (colamd_debug >= 2) (void) PRINTF params ; } #define DEBUG3(params) { if (colamd_debug >= 3) (void) PRINTF params ; } #define DEBUG4(params) { if (colamd_debug >= 4) (void) PRINTF params ; } #ifdef MATLAB_MEX_FILE #define ASSERT(expression) (mxAssert ((expression), "")) #else #define ASSERT(expression) (assert (expression)) #endif /* MATLAB_MEX_FILE */ PRIVATE void colamd_get_debug /* gets the debug print level from getenv */ ( char *method ) ; PRIVATE void debug_deg_lists ( int n_row, int n_col, Colamd_Row Row [], Colamd_Col Col [], int head [], int min_score, int should, int max_deg ) ; PRIVATE void debug_mark ( int n_row, Colamd_Row Row [], int tag_mark, int max_mark ) ; PRIVATE void debug_matrix ( int n_row, int n_col, Colamd_Row Row [], Colamd_Col Col [], int A [] ) ; PRIVATE void debug_structures ( int n_row, int n_col, Colamd_Row Row [], Colamd_Col Col [], int A [], int n_col2 ) ; #else /* NDEBUG */ /* === No debugging ========================================================= */ #define DEBUG0(params) ; #define DEBUG1(params) ; #define DEBUG2(params) ; #define DEBUG3(params) ; #define DEBUG4(params) ; #define ASSERT(expression) ((void) 0) #endif /* NDEBUG */ /* ========================================================================== */ /* ========================================================================== */ /* === USER-CALLABLE ROUTINES: ============================================== */ /* ========================================================================== */ /* ========================================================================== */ /* === colamd_recommended =================================================== */ /* ========================================================================== */ /* The colamd_recommended routine returns the suggested size for Alen. This value has been determined to provide good balance between the number of garbage collections and the memory requirements for colamd. If any argument is negative, a -1 is returned as an error condition. This function is also available as a macro defined in colamd.h, so that you can use it for a statically-allocated array size. */ PUBLIC int colamd_recommended /* returns recommended value of Alen. */ ( /* === Parameters ======================================================= */ int nnz, /* number of nonzeros in A */ int n_row, /* number of rows in A */ int n_col /* number of columns in A */ ) { return (COLAMD_RECOMMENDED (nnz, n_row, n_col)) ; } /* ========================================================================== */ /* === colamd_set_defaults ================================================== */ /* ========================================================================== */ /* The colamd_set_defaults routine sets the default values of the user- controllable parameters for colamd: knobs [0] rows with knobs[0]*n_col entries or more are removed prior to ordering in colamd. Rows and columns with knobs[0]*n_col entries or more are removed prior to ordering in symamd and placed last in the output ordering. knobs [1] columns with knobs[1]*n_row entries or more are removed prior to ordering in colamd, and placed last in the column permutation. Symamd ignores this knob. knobs [2..19] unused, but future versions might use this */ PUBLIC void colamd_set_defaults ( /* === Parameters ======================================================= */ double knobs [COLAMD_KNOBS] /* knob array */ ) { /* === Local variables ================================================== */ int i ; if (!knobs) { return ; /* no knobs to initialize */ } for (i = 0 ; i < COLAMD_KNOBS ; i++) { knobs [i] = 0 ; } knobs [COLAMD_DENSE_ROW] = 0.5 ; /* ignore rows over 50% dense */ knobs [COLAMD_DENSE_COL] = 0.5 ; /* ignore columns over 50% dense */ } /* ========================================================================== */ /* === symamd =============================================================== */ /* ========================================================================== */ PUBLIC int symamd /* return TRUE if OK, FALSE otherwise */ ( /* === Parameters ======================================================= */ int n, /* number of rows and columns of A */ int A [], /* row indices of A */ int p [], /* column pointers of A */ int perm [], /* output permutation, size n+1 */ double knobs [COLAMD_KNOBS], /* parameters (uses defaults if NULL) */ int stats [COLAMD_STATS], /* output statistics and error codes */ void * (*allocate) (size_t, size_t), /* pointer to calloc (ANSI C) or */ /* mxCalloc (for MATLAB mexFunction) */ void (*release) (void *) /* pointer to free (ANSI C) or */ /* mxFree (for MATLAB mexFunction) */ ) { /* === Local variables ================================================== */ int *count ; /* length of each column of M, and col pointer*/ int *mark ; /* mark array for finding duplicate entries */ int *M ; /* row indices of matrix M */ int Mlen ; /* length of M */ int n_row ; /* number of rows in M */ int nnz ; /* number of entries in A */ int i ; /* row index of A */ int j ; /* column index of A */ int k ; /* row index of M */ int mnz ; /* number of nonzeros in M */ int pp ; /* index into a column of A */ int last_row ; /* last row seen in the current column */ int length ; /* number of nonzeros in a column */ double cknobs [COLAMD_KNOBS] ; /* knobs for colamd */ double default_knobs [COLAMD_KNOBS] ; /* default knobs for colamd */ int cstats [COLAMD_STATS] ; /* colamd stats */ #ifndef NDEBUG colamd_get_debug ("symamd") ; #endif /* NDEBUG */ /* === Check the input arguments ======================================== */ if (!stats) { DEBUG0 (("symamd: stats not present\n")) ; return (FALSE) ; } for (i = 0 ; i < COLAMD_STATS ; i++) { stats [i] = 0 ; } stats [COLAMD_STATUS] = COLAMD_OK ; stats [COLAMD_INFO1] = -1 ; stats [COLAMD_INFO2] = -1 ; if (!A) { stats [COLAMD_STATUS] = COLAMD_ERROR_A_not_present ; DEBUG0 (("symamd: A not present\n")) ; return (FALSE) ; } if (!p) /* p is not present */ { stats [COLAMD_STATUS] = COLAMD_ERROR_p_not_present ; DEBUG0 (("symamd: p not present\n")) ; return (FALSE) ; } if (n < 0) /* n must be >= 0 */ { stats [COLAMD_STATUS] = COLAMD_ERROR_ncol_negative ; stats [COLAMD_INFO1] = n ; DEBUG0 (("symamd: n negative %d\n", n)) ; return (FALSE) ; } nnz = p [n] ; if (nnz < 0) /* nnz must be >= 0 */ { stats [COLAMD_STATUS] = COLAMD_ERROR_nnz_negative ; stats [COLAMD_INFO1] = nnz ; DEBUG0 (("symamd: number of entries negative %d\n", nnz)) ; return (FALSE) ; } if (p [0] != 0) { stats [COLAMD_STATUS] = COLAMD_ERROR_p0_nonzero ; stats [COLAMD_INFO1] = p [0] ; DEBUG0 (("symamd: p[0] not zero %d\n", p [0])) ; return (FALSE) ; } /* === If no knobs, set default knobs =================================== */ if (!knobs) { colamd_set_defaults (default_knobs) ; knobs = default_knobs ; } /* === Allocate count and mark ========================================== */ count = (int *) ((*allocate) (n+1, sizeof (int))) ; if (!count) { stats [COLAMD_STATUS] = COLAMD_ERROR_out_of_memory ; DEBUG0 (("symamd: allocate count (size %d) failed\n", n+1)) ; return (FALSE) ; } mark = (int *) ((*allocate) (n+1, sizeof (int))) ; if (!mark) { stats [COLAMD_STATUS] = COLAMD_ERROR_out_of_memory ; (*release) ((void *) count) ; DEBUG0 (("symamd: allocate mark (size %d) failed\n", n+1)) ; return (FALSE) ; } /* === Compute column counts of M, check if A is valid ================== */ stats [COLAMD_INFO3] = 0 ; /* number of duplicate or unsorted row indices*/ for (i = 0 ; i < n ; i++) { mark [i] = -1 ; } for (j = 0 ; j < n ; j++) { last_row = -1 ; length = p [j+1] - p [j] ; if (length < 0) { /* column pointers must be non-decreasing */ stats [COLAMD_STATUS] = COLAMD_ERROR_col_length_negative ; stats [COLAMD_INFO1] = j ; stats [COLAMD_INFO2] = length ; (*release) ((void *) count) ; (*release) ((void *) mark) ; DEBUG0 (("symamd: col %d negative length %d\n", j, length)) ; return (FALSE) ; } for (pp = p [j] ; pp < p [j+1] ; pp++) { i = A [pp] ; if (i < 0 || i >= n) { /* row index i, in column j, is out of bounds */ stats [COLAMD_STATUS] = COLAMD_ERROR_row_index_out_of_bounds ; stats [COLAMD_INFO1] = j ; stats [COLAMD_INFO2] = i ; stats [COLAMD_INFO3] = n ; (*release) ((void *) count) ; (*release) ((void *) mark) ; DEBUG0 (("symamd: row %d col %d out of bounds\n", i, j)) ; return (FALSE) ; } if (i <= last_row || mark [i] == j) { /* row index is unsorted or repeated (or both), thus col */ /* is jumbled. This is a notice, not an error condition. */ stats [COLAMD_STATUS] = COLAMD_OK_BUT_JUMBLED ; stats [COLAMD_INFO1] = j ; stats [COLAMD_INFO2] = i ; (stats [COLAMD_INFO3]) ++ ; DEBUG1 (("symamd: row %d col %d unsorted/duplicate\n", i, j)) ; } if (i > j && mark [i] != j) { /* row k of M will contain column indices i and j */ count [i]++ ; count [j]++ ; } /* mark the row as having been seen in this column */ mark [i] = j ; last_row = i ; } } if (stats [COLAMD_STATUS] == COLAMD_OK) { /* if there are no duplicate entries, then mark is no longer needed */ (*release) ((void *) mark) ; } /* === Compute column pointers of M ===================================== */ /* use output permutation, perm, for column pointers of M */ perm [0] = 0 ; for (j = 1 ; j <= n ; j++) { perm [j] = perm [j-1] + count [j-1] ; } for (j = 0 ; j < n ; j++) { count [j] = perm [j] ; } /* === Construct M ====================================================== */ mnz = perm [n] ; n_row = mnz / 2 ; Mlen = colamd_recommended (mnz, n_row, n) ; M = (int *) ((*allocate) (Mlen, sizeof (int))) ; DEBUG0 (("symamd: M is %d-by-%d with %d entries, Mlen = %d\n", n_row, n, mnz, Mlen)) ; if (!M) { stats [COLAMD_STATUS] = COLAMD_ERROR_out_of_memory ; (*release) ((void *) count) ; (*release) ((void *) mark) ; DEBUG0 (("symamd: allocate M (size %d) failed\n", Mlen)) ; return (FALSE) ; } k = 0 ; if (stats [COLAMD_STATUS] == COLAMD_OK) { /* Matrix is OK */ for (j = 0 ; j < n ; j++) { ASSERT (p [j+1] - p [j] >= 0) ; for (pp = p [j] ; pp < p [j+1] ; pp++) { i = A [pp] ; ASSERT (i >= 0 && i < n) ; if (i > j) { /* row k of M contains column indices i and j */ M [count [i]++] = k ; M [count [j]++] = k ; k++ ; } } } } else { /* Matrix is jumbled. Do not add duplicates to M. Unsorted cols OK. */ DEBUG0 (("symamd: Duplicates in A.\n")) ; for (i = 0 ; i < n ; i++) { mark [i] = -1 ; } for (j = 0 ; j < n ; j++) { ASSERT (p [j+1] - p [j] >= 0) ; for (pp = p [j] ; pp < p [j+1] ; pp++) { i = A [pp] ; ASSERT (i >= 0 && i < n) ; if (i > j && mark [i] != j) { /* row k of M contains column indices i and j */ M [count [i]++] = k ; M [count [j]++] = k ; k++ ; mark [i] = j ; } } } (*release) ((void *) mark) ; } /* count and mark no longer needed */ (*release) ((void *) count) ; ASSERT (k == n_row) ; /* === Adjust the knobs for M =========================================== */ for (i = 0 ; i < COLAMD_KNOBS ; i++) { cknobs [i] = knobs [i] ; } /* there are no dense rows in M */ cknobs [COLAMD_DENSE_ROW] = 1.0 ; if (n_row != 0 && n < n_row) { /* On input, the knob is a fraction of 1..n, the number of rows of A. */ /* Convert it to a fraction of 1..n_row, of the number of rows of M. */ cknobs [COLAMD_DENSE_COL] = (knobs [COLAMD_DENSE_ROW] * n) / n_row ; } else { /* no dense columns in M */ cknobs [COLAMD_DENSE_COL] = 1.0 ; } DEBUG0 (("symamd: dense col knob for M: %g\n", cknobs [COLAMD_DENSE_COL])) ; /* === Order the columns of M =========================================== */ if (!colamd (n_row, n, Mlen, M, perm, cknobs, cstats)) { /* This "cannot" happen, unless there is a bug in the code. */ stats [COLAMD_STATUS] = COLAMD_ERROR_internal_error ; (*release) ((void *) M) ; DEBUG0 (("symamd: internal error!\n")) ; return (FALSE) ; } /* Note that the output permutation is now in perm */ /* === get the statistics for symamd from colamd ======================== */ /* note that a dense column in colamd means a dense row and col in symamd */ stats [COLAMD_DENSE_ROW] = cstats [COLAMD_DENSE_COL] ; stats [COLAMD_DENSE_COL] = cstats [COLAMD_DENSE_COL] ; stats [COLAMD_DEFRAG_COUNT] = cstats [COLAMD_DEFRAG_COUNT] ; /* === Free M =========================================================== */ (*release) ((void *) M) ; DEBUG0 (("symamd: done.\n")) ; return (TRUE) ; } /* ========================================================================== */ /* === colamd =============================================================== */ /* ========================================================================== */ /* The colamd routine computes a column ordering Q of a sparse matrix A such that the LU factorization P(AQ) = LU remains sparse, where P is selected via partial pivoting. The routine can also be viewed as providing a permutation Q such that the Cholesky factorization (AQ)'(AQ) = LL' remains sparse. */ PUBLIC int colamd /* returns TRUE if successful, FALSE otherwise*/ ( /* === Parameters ======================================================= */ int n_row, /* number of rows in A */ int n_col, /* number of columns in A */ int Alen, /* length of A */ int A [], /* row indices of A */ int p [], /* pointers to columns in A */ double knobs [COLAMD_KNOBS],/* parameters (uses defaults if NULL) */ int stats [COLAMD_STATS] /* output statistics and error codes */ ) { /* === Local variables ================================================== */ int i ; /* loop index */ int nnz ; /* nonzeros in A */ int Row_size ; /* size of Row [], in integers */ int Col_size ; /* size of Col [], in integers */ int need ; /* minimum required length of A */ Colamd_Row *Row ; /* pointer into A of Row [0..n_row] array */ Colamd_Col *Col ; /* pointer into A of Col [0..n_col] array */ int n_col2 ; /* number of non-dense, non-empty columns */ int n_row2 ; /* number of non-dense, non-empty rows */ int ngarbage ; /* number of garbage collections performed */ int max_deg ; /* maximum row degree */ double default_knobs [COLAMD_KNOBS] ; /* default knobs array */ #ifndef NDEBUG colamd_get_debug ("colamd") ; #endif /* NDEBUG */ /* === Check the input arguments ======================================== */ if (!stats) { DEBUG0 (("colamd: stats not present\n")) ; return (FALSE) ; } for (i = 0 ; i < COLAMD_STATS ; i++) { stats [i] = 0 ; } stats [COLAMD_STATUS] = COLAMD_OK ; stats [COLAMD_INFO1] = -1 ; stats [COLAMD_INFO2] = -1 ; if (!A) /* A is not present */ { stats [COLAMD_STATUS] = COLAMD_ERROR_A_not_present ; DEBUG0 (("colamd: A not present\n")) ; return (FALSE) ; } if (!p) /* p is not present */ { stats [COLAMD_STATUS] = COLAMD_ERROR_p_not_present ; DEBUG0 (("colamd: p not present\n")) ; return (FALSE) ; } if (n_row < 0) /* n_row must be >= 0 */ { stats [COLAMD_STATUS] = COLAMD_ERROR_nrow_negative ; stats [COLAMD_INFO1] = n_row ; DEBUG0 (("colamd: nrow negative %d\n", n_row)) ; return (FALSE) ; } if (n_col < 0) /* n_col must be >= 0 */ { stats [COLAMD_STATUS] = COLAMD_ERROR_ncol_negative ; stats [COLAMD_INFO1] = n_col ; DEBUG0 (("colamd: ncol negative %d\n", n_col)) ; return (FALSE) ; } nnz = p [n_col] ; if (nnz < 0) /* nnz must be >= 0 */ { stats [COLAMD_STATUS] = COLAMD_ERROR_nnz_negative ; stats [COLAMD_INFO1] = nnz ; DEBUG0 (("colamd: number of entries negative %d\n", nnz)) ; return (FALSE) ; } if (p [0] != 0) { stats [COLAMD_STATUS] = COLAMD_ERROR_p0_nonzero ; stats [COLAMD_INFO1] = p [0] ; DEBUG0 (("colamd: p[0] not zero %d\n", p [0])) ; return (FALSE) ; } /* === If no knobs, set default knobs =================================== */ if (!knobs) { colamd_set_defaults (default_knobs) ; knobs = default_knobs ; } /* === Allocate the Row and Col arrays from array A ===================== */ Col_size = COLAMD_C (n_col) ; Row_size = COLAMD_R (n_row) ; need = 2*nnz + n_col + Col_size + Row_size ; if (need > Alen) { /* not enough space in array A to perform the ordering */ stats [COLAMD_STATUS] = COLAMD_ERROR_A_too_small ; stats [COLAMD_INFO1] = need ; stats [COLAMD_INFO2] = Alen ; DEBUG0 (("colamd: Need Alen >= %d, given only Alen = %d\n", need,Alen)); return (FALSE) ; } Alen -= Col_size + Row_size ; Col = (Colamd_Col *) &A [Alen] ; Row = (Colamd_Row *) &A [Alen + Col_size] ; /* === Construct the row and column data structures ===================== */ if (!init_rows_cols (n_row, n_col, Row, Col, A, p, stats)) { /* input matrix is invalid */ DEBUG0 (("colamd: Matrix invalid\n")) ; return (FALSE) ; } /* === Initialize scores, kill dense rows/columns ======================= */ init_scoring (n_row, n_col, Row, Col, A, p, knobs, &n_row2, &n_col2, &max_deg) ; /* === Order the supercolumns =========================================== */ ngarbage = find_ordering (n_row, n_col, Alen, Row, Col, A, p, n_col2, max_deg, 2*nnz) ; /* === Order the non-principal columns ================================== */ order_children (n_col, Col, p) ; /* === Return statistics in stats ======================================= */ stats [COLAMD_DENSE_ROW] = n_row - n_row2 ; stats [COLAMD_DENSE_COL] = n_col - n_col2 ; stats [COLAMD_DEFRAG_COUNT] = ngarbage ; DEBUG0 (("colamd: done.\n")) ; return (TRUE) ; } /* ========================================================================== */ /* === colamd_report ======================================================== */ /* ========================================================================== */ PUBLIC void colamd_report ( int stats [COLAMD_STATS] ) { print_report ("colamd", stats) ; } /* ========================================================================== */ /* === symamd_report ======================================================== */ /* ========================================================================== */ PUBLIC void symamd_report ( int stats [COLAMD_STATS] ) { print_report ("symamd", stats) ; } /* ========================================================================== */ /* === NON-USER-CALLABLE ROUTINES: ========================================== */ /* ========================================================================== */ /* There are no user-callable routines beyond this point in the file */ /* ========================================================================== */ /* === init_rows_cols ======================================================= */ /* ========================================================================== */ /* Takes the column form of the matrix in A and creates the row form of the matrix. Also, row and column attributes are stored in the Col and Row structs. If the columns are un-sorted or contain duplicate row indices, this routine will also sort and remove duplicate row indices from the column form of the matrix. Returns FALSE if the matrix is invalid, TRUE otherwise. Not user-callable. */ PRIVATE int init_rows_cols /* returns TRUE if OK, or FALSE otherwise */ ( /* === Parameters ======================================================= */ int n_row, /* number of rows of A */ int n_col, /* number of columns of A */ Colamd_Row Row [], /* of size n_row+1 */ Colamd_Col Col [], /* of size n_col+1 */ int A [], /* row indices of A, of size Alen */ int p [], /* pointers to columns in A, of size n_col+1 */ int stats [COLAMD_STATS] /* colamd statistics */ ) { /* === Local variables ================================================== */ int col ; /* a column index */ int row ; /* a row index */ int *cp ; /* a column pointer */ int *cp_end ; /* a pointer to the end of a column */ int *rp ; /* a row pointer */ int *rp_end ; /* a pointer to the end of a row */ int last_row ; /* previous row */ /* === Initialize columns, and check column pointers ==================== */ for (col = 0 ; col < n_col ; col++) { Col [col].start = p [col] ; Col [col].length = p [col+1] - p [col] ; if (Col [col].length < 0) { /* column pointers must be non-decreasing */ stats [COLAMD_STATUS] = COLAMD_ERROR_col_length_negative ; stats [COLAMD_INFO1] = col ; stats [COLAMD_INFO2] = Col [col].length ; DEBUG0 (("colamd: col %d length %d < 0\n", col, Col [col].length)) ; return (FALSE) ; } Col [col].shared1.thickness = 1 ; Col [col].shared2.score = 0 ; Col [col].shared3.prev = EMPTY ; Col [col].shared4.degree_next = EMPTY ; } /* p [0..n_col] no longer needed, used as "head" in subsequent routines */ /* === Scan columns, compute row degrees, and check row indices ========= */ stats [COLAMD_INFO3] = 0 ; /* number of duplicate or unsorted row indices*/ for (row = 0 ; row < n_row ; row++) { Row [row].length = 0 ; Row [row].shared2.mark = -1 ; } for (col = 0 ; col < n_col ; col++) { last_row = -1 ; cp = &A [p [col]] ; cp_end = &A [p [col+1]] ; while (cp < cp_end) { row = *cp++ ; /* make sure row indices within range */ if (row < 0 || row >= n_row) { stats [COLAMD_STATUS] = COLAMD_ERROR_row_index_out_of_bounds ; stats [COLAMD_INFO1] = col ; stats [COLAMD_INFO2] = row ; stats [COLAMD_INFO3] = n_row ; DEBUG0 (("colamd: row %d col %d out of bounds\n", row, col)) ; return (FALSE) ; } if (row <= last_row || Row [row].shared2.mark == col) { /* row index are unsorted or repeated (or both), thus col */ /* is jumbled. This is a notice, not an error condition. */ stats [COLAMD_STATUS] = COLAMD_OK_BUT_JUMBLED ; stats [COLAMD_INFO1] = col ; stats [COLAMD_INFO2] = row ; (stats [COLAMD_INFO3]) ++ ; DEBUG1 (("colamd: row %d col %d unsorted/duplicate\n",row,col)); } if (Row [row].shared2.mark != col) { Row [row].length++ ; } else { /* this is a repeated entry in the column, */ /* it will be removed */ Col [col].length-- ; } /* mark the row as having been seen in this column */ Row [row].shared2.mark = col ; last_row = row ; } } /* === Compute row pointers ============================================= */ /* row form of the matrix starts directly after the column */ /* form of matrix in A */ Row [0].start = p [n_col] ; Row [0].shared1.p = Row [0].start ; Row [0].shared2.mark = -1 ; for (row = 1 ; row < n_row ; row++) { Row [row].start = Row [row-1].start + Row [row-1].length ; Row [row].shared1.p = Row [row].start ; Row [row].shared2.mark = -1 ; } /* === Create row form ================================================== */ if (stats [COLAMD_STATUS] == COLAMD_OK_BUT_JUMBLED) { /* if cols jumbled, watch for repeated row indices */ for (col = 0 ; col < n_col ; col++) { cp = &A [p [col]] ; cp_end = &A [p [col+1]] ; while (cp < cp_end) { row = *cp++ ; if (Row [row].shared2.mark != col) { A [(Row [row].shared1.p)++] = col ; Row [row].shared2.mark = col ; } } } } else { /* if cols not jumbled, we don't need the mark (this is faster) */ for (col = 0 ; col < n_col ; col++) { cp = &A [p [col]] ; cp_end = &A [p [col+1]] ; while (cp < cp_end) { A [(Row [*cp++].shared1.p)++] = col ; } } } /* === Clear the row marks and set row degrees ========================== */ for (row = 0 ; row < n_row ; row++) { Row [row].shared2.mark = 0 ; Row [row].shared1.degree = Row [row].length ; } /* === See if we need to re-create columns ============================== */ if (stats [COLAMD_STATUS] == COLAMD_OK_BUT_JUMBLED) { DEBUG0 (("colamd: reconstructing column form, matrix jumbled\n")) ; #ifndef NDEBUG /* make sure column lengths are correct */ for (col = 0 ; col < n_col ; col++) { p [col] = Col [col].length ; } for (row = 0 ; row < n_row ; row++) { rp = &A [Row [row].start] ; rp_end = rp + Row [row].length ; while (rp < rp_end) { p [*rp++]-- ; } } for (col = 0 ; col < n_col ; col++) { ASSERT (p [col] == 0) ; } /* now p is all zero (different than when debugging is turned off) */ #endif /* NDEBUG */ /* === Compute col pointers ========================================= */ /* col form of the matrix starts at A [0]. */ /* Note, we may have a gap between the col form and the row */ /* form if there were duplicate entries, if so, it will be */ /* removed upon the first garbage collection */ Col [0].start = 0 ; p [0] = Col [0].start ; for (col = 1 ; col < n_col ; col++) { /* note that the lengths here are for pruned columns, i.e. */ /* no duplicate row indices will exist for these columns */ Col [col].start = Col [col-1].start + Col [col-1].length ; p [col] = Col [col].start ; } /* === Re-create col form =========================================== */ for (row = 0 ; row < n_row ; row++) { rp = &A [Row [row].start] ; rp_end = rp + Row [row].length ; while (rp < rp_end) { A [(p [*rp++])++] = row ; } } } /* === Done. Matrix is not (or no longer) jumbled ====================== */ return (TRUE) ; } /* ========================================================================== */ /* === init_scoring ========================================================= */ /* ========================================================================== */ /* Kills dense or empty columns and rows, calculates an initial score for each column, and places all columns in the degree lists. Not user-callable. */ PRIVATE void init_scoring ( /* === Parameters ======================================================= */ int n_row, /* number of rows of A */ int n_col, /* number of columns of A */ Colamd_Row Row [], /* of size n_row+1 */ Colamd_Col Col [], /* of size n_col+1 */ int A [], /* column form and row form of A */ int head [], /* of size n_col+1 */ double knobs [COLAMD_KNOBS],/* parameters */ int *p_n_row2, /* number of non-dense, non-empty rows */ int *p_n_col2, /* number of non-dense, non-empty columns */ int *p_max_deg /* maximum row degree */ ) { /* === Local variables ================================================== */ int c ; /* a column index */ int r, row ; /* a row index */ int *cp ; /* a column pointer */ int deg ; /* degree of a row or column */ int *cp_end ; /* a pointer to the end of a column */ int *new_cp ; /* new column pointer */ int col_length ; /* length of pruned column */ int score ; /* current column score */ int n_col2 ; /* number of non-dense, non-empty columns */ int n_row2 ; /* number of non-dense, non-empty rows */ int dense_row_count ; /* remove rows with more entries than this */ int dense_col_count ; /* remove cols with more entries than this */ int min_score ; /* smallest column score */ int max_deg ; /* maximum row degree */ int next_col ; /* Used to add to degree list.*/ #ifndef NDEBUG int debug_count ; /* debug only. */ #endif /* NDEBUG */ /* === Extract knobs ==================================================== */ dense_row_count = MAX (0, MIN (knobs [COLAMD_DENSE_ROW] * n_col, n_col)) ; dense_col_count = MAX (0, MIN (knobs [COLAMD_DENSE_COL] * n_row, n_row)) ; DEBUG1 (("colamd: densecount: %d %d\n", dense_row_count, dense_col_count)) ; max_deg = 0 ; n_col2 = n_col ; n_row2 = n_row ; /* === Kill empty columns =============================================== */ /* Put the empty columns at the end in their natural order, so that LU */ /* factorization can proceed as far as possible. */ for (c = n_col-1 ; c >= 0 ; c--) { deg = Col [c].length ; if (deg == 0) { /* this is a empty column, kill and order it last */ Col [c].shared2.order = --n_col2 ; KILL_PRINCIPAL_COL (c) ; } } DEBUG1 (("colamd: null columns killed: %d\n", n_col - n_col2)) ; /* === Kill dense columns =============================================== */ /* Put the dense columns at the end, in their natural order */ for (c = n_col-1 ; c >= 0 ; c--) { /* skip any dead columns */ if (COL_IS_DEAD (c)) { continue ; } deg = Col [c].length ; if (deg > dense_col_count) { /* this is a dense column, kill and order it last */ Col [c].shared2.order = --n_col2 ; /* decrement the row degrees */ cp = &A [Col [c].start] ; cp_end = cp + Col [c].length ; while (cp < cp_end) { Row [*cp++].shared1.degree-- ; } KILL_PRINCIPAL_COL (c) ; } } DEBUG1 (("colamd: Dense and null columns killed: %d\n", n_col - n_col2)) ; /* === Kill dense and empty rows ======================================== */ for (r = 0 ; r < n_row ; r++) { deg = Row [r].shared1.degree ; ASSERT (deg >= 0 && deg <= n_col) ; if (deg > dense_row_count || deg == 0) { /* kill a dense or empty row */ KILL_ROW (r) ; --n_row2 ; } else { /* keep track of max degree of remaining rows */ max_deg = MAX (max_deg, deg) ; } } DEBUG1 (("colamd: Dense and null rows killed: %d\n", n_row - n_row2)) ; /* === Compute initial column scores ==================================== */ /* At this point the row degrees are accurate. They reflect the number */ /* of "live" (non-dense) columns in each row. No empty rows exist. */ /* Some "live" columns may contain only dead rows, however. These are */ /* pruned in the code below. */ /* now find the initial matlab score for each column */ for (c = n_col-1 ; c >= 0 ; c--) { /* skip dead column */ if (COL_IS_DEAD (c)) { continue ; } score = 0 ; cp = &A [Col [c].start] ; new_cp = cp ; cp_end = cp + Col [c].length ; while (cp < cp_end) { /* get a row */ row = *cp++ ; /* skip if dead */ if (ROW_IS_DEAD (row)) { continue ; } /* compact the column */ *new_cp++ = row ; /* add row's external degree */ score += Row [row].shared1.degree - 1 ; /* guard against integer overflow */ score = MIN (score, n_col) ; } /* determine pruned column length */ col_length = (int) (new_cp - &A [Col [c].start]) ; if (col_length == 0) { /* a newly-made null column (all rows in this col are "dense" */ /* and have already been killed) */ DEBUG2 (("Newly null killed: %d\n", c)) ; Col [c].shared2.order = --n_col2 ; KILL_PRINCIPAL_COL (c) ; } else { /* set column length and set score */ ASSERT (score >= 0) ; ASSERT (score <= n_col) ; Col [c].length = col_length ; Col [c].shared2.score = score ; } } DEBUG1 (("colamd: Dense, null, and newly-null columns killed: %d\n", n_col-n_col2)) ; /* At this point, all empty rows and columns are dead. All live columns */ /* are "clean" (containing no dead rows) and simplicial (no supercolumns */ /* yet). Rows may contain dead columns, but all live rows contain at */ /* least one live column. */ #ifndef NDEBUG debug_structures (n_row, n_col, Row, Col, A, n_col2) ; #endif /* NDEBUG */ /* === Initialize degree lists ========================================== */ #ifndef NDEBUG debug_count = 0 ; #endif /* NDEBUG */ /* clear the hash buckets */ for (c = 0 ; c <= n_col ; c++) { head [c] = EMPTY ; } min_score = n_col ; /* place in reverse order, so low column indices are at the front */ /* of the lists. This is to encourage natural tie-breaking */ for (c = n_col-1 ; c >= 0 ; c--) { /* only add principal columns to degree lists */ if (COL_IS_ALIVE (c)) { DEBUG4 (("place %d score %d minscore %d ncol %d\n", c, Col [c].shared2.score, min_score, n_col)) ; /* === Add columns score to DList =============================== */ score = Col [c].shared2.score ; ASSERT (min_score >= 0) ; ASSERT (min_score <= n_col) ; ASSERT (score >= 0) ; ASSERT (score <= n_col) ; ASSERT (head [score] >= EMPTY) ; /* now add this column to dList at proper score location */ next_col = head [score] ; Col [c].shared3.prev = EMPTY ; Col [c].shared4.degree_next = next_col ; /* if there already was a column with the same score, set its */ /* previous pointer to this new column */ if (next_col != EMPTY) { Col [next_col].shared3.prev = c ; } head [score] = c ; /* see if this score is less than current min */ min_score = MIN (min_score, score) ; #ifndef NDEBUG debug_count++ ; #endif /* NDEBUG */ } } #ifndef NDEBUG DEBUG1 (("colamd: Live cols %d out of %d, non-princ: %d\n", debug_count, n_col, n_col-debug_count)) ; ASSERT (debug_count == n_col2) ; debug_deg_lists (n_row, n_col, Row, Col, head, min_score, n_col2, max_deg) ; #endif /* NDEBUG */ /* === Return number of remaining columns, and max row degree =========== */ *p_n_col2 = n_col2 ; *p_n_row2 = n_row2 ; *p_max_deg = max_deg ; } /* ========================================================================== */ /* === find_ordering ======================================================== */ /* ========================================================================== */ /* Order the principal columns of the supercolumn form of the matrix (no supercolumns on input). Uses a minimum approximate column minimum degree ordering method. Not user-callable. */ PRIVATE int find_ordering /* return the number of garbage collections */ ( /* === Parameters ======================================================= */ int n_row, /* number of rows of A */ int n_col, /* number of columns of A */ int Alen, /* size of A, 2*nnz + n_col or larger */ Colamd_Row Row [], /* of size n_row+1 */ Colamd_Col Col [], /* of size n_col+1 */ int A [], /* column form and row form of A */ int head [], /* of size n_col+1 */ int n_col2, /* Remaining columns to order */ int max_deg, /* Maximum row degree */ int pfree /* index of first free slot (2*nnz on entry) */ ) { /* === Local variables ================================================== */ int k ; /* current pivot ordering step */ int pivot_col ; /* current pivot column */ int *cp ; /* a column pointer */ int *rp ; /* a row pointer */ int pivot_row ; /* current pivot row */ int *new_cp ; /* modified column pointer */ int *new_rp ; /* modified row pointer */ int pivot_row_start ; /* pointer to start of pivot row */ int pivot_row_degree ; /* number of columns in pivot row */ int pivot_row_length ; /* number of supercolumns in pivot row */ int pivot_col_score ; /* score of pivot column */ int needed_memory ; /* free space needed for pivot row */ int *cp_end ; /* pointer to the end of a column */ int *rp_end ; /* pointer to the end of a row */ int row ; /* a row index */ int col ; /* a column index */ int max_score ; /* maximum possible score */ int cur_score ; /* score of current column */ unsigned int hash ; /* hash value for supernode detection */ int head_column ; /* head of hash bucket */ int first_col ; /* first column in hash bucket */ int tag_mark ; /* marker value for mark array */ int row_mark ; /* Row [row].shared2.mark */ int set_difference ; /* set difference size of row with pivot row */ int min_score ; /* smallest column score */ int col_thickness ; /* "thickness" (no. of columns in a supercol) */ int max_mark ; /* maximum value of tag_mark */ int pivot_col_thickness ; /* number of columns represented by pivot col */ int prev_col ; /* Used by Dlist operations. */ int next_col ; /* Used by Dlist operations. */ int ngarbage ; /* number of garbage collections performed */ #ifndef NDEBUG int debug_d ; /* debug loop counter */ int debug_step = 0 ; /* debug loop counter */ #endif /* NDEBUG */ /* === Initialization and clear mark ==================================== */ max_mark = INT_MAX - n_col ; /* INT_MAX defined in */ tag_mark = clear_mark (n_row, Row) ; min_score = 0 ; ngarbage = 0 ; DEBUG1 (("colamd: Ordering, n_col2=%d\n", n_col2)) ; /* === Order the columns ================================================ */ for (k = 0 ; k < n_col2 ; /* 'k' is incremented below */) { #ifndef NDEBUG if (debug_step % 100 == 0) { DEBUG2 (("\n... Step k: %d out of n_col2: %d\n", k, n_col2)) ; } else { DEBUG3 (("\n----------Step k: %d out of n_col2: %d\n", k, n_col2)) ; } debug_step++ ; debug_deg_lists (n_row, n_col, Row, Col, head, min_score, n_col2-k, max_deg) ; debug_matrix (n_row, n_col, Row, Col, A) ; #endif /* NDEBUG */ /* === Select pivot column, and order it ============================ */ /* make sure degree list isn't empty */ ASSERT (min_score >= 0) ; ASSERT (min_score <= n_col) ; ASSERT (head [min_score] >= EMPTY) ; #ifndef NDEBUG for (debug_d = 0 ; debug_d < min_score ; debug_d++) { ASSERT (head [debug_d] == EMPTY) ; } #endif /* NDEBUG */ /* get pivot column from head of minimum degree list */ while (head [min_score] == EMPTY && min_score < n_col) { min_score++ ; } pivot_col = head [min_score] ; ASSERT (pivot_col >= 0 && pivot_col <= n_col) ; next_col = Col [pivot_col].shared4.degree_next ; head [min_score] = next_col ; if (next_col != EMPTY) { Col [next_col].shared3.prev = EMPTY ; } ASSERT (COL_IS_ALIVE (pivot_col)) ; DEBUG3 (("Pivot col: %d\n", pivot_col)) ; /* remember score for defrag check */ pivot_col_score = Col [pivot_col].shared2.score ; /* the pivot column is the kth column in the pivot order */ Col [pivot_col].shared2.order = k ; /* increment order count by column thickness */ pivot_col_thickness = Col [pivot_col].shared1.thickness ; k += pivot_col_thickness ; ASSERT (pivot_col_thickness > 0) ; /* === Garbage_collection, if necessary ============================= */ needed_memory = MIN (pivot_col_score, n_col - k) ; if (pfree + needed_memory >= Alen) { pfree = garbage_collection (n_row, n_col, Row, Col, A, &A [pfree]) ; ngarbage++ ; /* after garbage collection we will have enough */ ASSERT (pfree + needed_memory < Alen) ; /* garbage collection has wiped out the Row[].shared2.mark array */ tag_mark = clear_mark (n_row, Row) ; #ifndef NDEBUG debug_matrix (n_row, n_col, Row, Col, A) ; #endif /* NDEBUG */ } /* === Compute pivot row pattern ==================================== */ /* get starting location for this new merged row */ pivot_row_start = pfree ; /* initialize new row counts to zero */ pivot_row_degree = 0 ; /* tag pivot column as having been visited so it isn't included */ /* in merged pivot row */ Col [pivot_col].shared1.thickness = -pivot_col_thickness ; /* pivot row is the union of all rows in the pivot column pattern */ cp = &A [Col [pivot_col].start] ; cp_end = cp + Col [pivot_col].length ; while (cp < cp_end) { /* get a row */ row = *cp++ ; DEBUG4 (("Pivot col pattern %d %d\n", ROW_IS_ALIVE (row), row)) ; /* skip if row is dead */ if (ROW_IS_DEAD (row)) { continue ; } rp = &A [Row [row].start] ; rp_end = rp + Row [row].length ; while (rp < rp_end) { /* get a column */ col = *rp++ ; /* add the column, if alive and untagged */ col_thickness = Col [col].shared1.thickness ; if (col_thickness > 0 && COL_IS_ALIVE (col)) { /* tag column in pivot row */ Col [col].shared1.thickness = -col_thickness ; ASSERT (pfree < Alen) ; /* place column in pivot row */ A [pfree++] = col ; pivot_row_degree += col_thickness ; } } } /* clear tag on pivot column */ Col [pivot_col].shared1.thickness = pivot_col_thickness ; max_deg = MAX (max_deg, pivot_row_degree) ; #ifndef NDEBUG DEBUG3 (("check2\n")) ; debug_mark (n_row, Row, tag_mark, max_mark) ; #endif /* NDEBUG */ /* === Kill all rows used to construct pivot row ==================== */ /* also kill pivot row, temporarily */ cp = &A [Col [pivot_col].start] ; cp_end = cp + Col [pivot_col].length ; while (cp < cp_end) { /* may be killing an already dead row */ row = *cp++ ; DEBUG3 (("Kill row in pivot col: %d\n", row)) ; KILL_ROW (row) ; } /* === Select a row index to use as the new pivot row =============== */ pivot_row_length = pfree - pivot_row_start ; if (pivot_row_length > 0) { /* pick the "pivot" row arbitrarily (first row in col) */ pivot_row = A [Col [pivot_col].start] ; DEBUG3 (("Pivotal row is %d\n", pivot_row)) ; } else { /* there is no pivot row, since it is of zero length */ pivot_row = EMPTY ; ASSERT (pivot_row_length == 0) ; } ASSERT (Col [pivot_col].length > 0 || pivot_row_length == 0) ; /* === Approximate degree computation =============================== */ /* Here begins the computation of the approximate degree. The column */ /* score is the sum of the pivot row "length", plus the size of the */ /* set differences of each row in the column minus the pattern of the */ /* pivot row itself. The column ("thickness") itself is also */ /* excluded from the column score (we thus use an approximate */ /* external degree). */ /* The time taken by the following code (compute set differences, and */ /* add them up) is proportional to the size of the data structure */ /* being scanned - that is, the sum of the sizes of each column in */ /* the pivot row. Thus, the amortized time to compute a column score */ /* is proportional to the size of that column (where size, in this */ /* context, is the column "length", or the number of row indices */ /* in that column). The number of row indices in a column is */ /* monotonically non-decreasing, from the length of the original */ /* column on input to colamd. */ /* === Compute set differences ====================================== */ DEBUG3 (("** Computing set differences phase. **\n")) ; /* pivot row is currently dead - it will be revived later. */ DEBUG3 (("Pivot row: ")) ; /* for each column in pivot row */ rp = &A [pivot_row_start] ; rp_end = rp + pivot_row_length ; while (rp < rp_end) { col = *rp++ ; ASSERT (COL_IS_ALIVE (col) && col != pivot_col) ; DEBUG3 (("Col: %d\n", col)) ; /* clear tags used to construct pivot row pattern */ col_thickness = -Col [col].shared1.thickness ; ASSERT (col_thickness > 0) ; Col [col].shared1.thickness = col_thickness ; /* === Remove column from degree list =========================== */ cur_score = Col [col].shared2.score ; prev_col = Col [col].shared3.prev ; next_col = Col [col].shared4.degree_next ; ASSERT (cur_score >= 0) ; ASSERT (cur_score <= n_col) ; ASSERT (cur_score >= EMPTY) ; if (prev_col == EMPTY) { head [cur_score] = next_col ; } else { Col [prev_col].shared4.degree_next = next_col ; } if (next_col != EMPTY) { Col [next_col].shared3.prev = prev_col ; } /* === Scan the column ========================================== */ cp = &A [Col [col].start] ; cp_end = cp + Col [col].length ; while (cp < cp_end) { /* get a row */ row = *cp++ ; row_mark = Row [row].shared2.mark ; /* skip if dead */ if (ROW_IS_MARKED_DEAD (row_mark)) { continue ; } ASSERT (row != pivot_row) ; set_difference = row_mark - tag_mark ; /* check if the row has been seen yet */ if (set_difference < 0) { ASSERT (Row [row].shared1.degree <= max_deg) ; set_difference = Row [row].shared1.degree ; } /* subtract column thickness from this row's set difference */ set_difference -= col_thickness ; ASSERT (set_difference >= 0) ; /* absorb this row if the set difference becomes zero */ if (set_difference == 0) { DEBUG3 (("aggressive absorption. Row: %d\n", row)) ; KILL_ROW (row) ; } else { /* save the new mark */ Row [row].shared2.mark = set_difference + tag_mark ; } } } #ifndef NDEBUG debug_deg_lists (n_row, n_col, Row, Col, head, min_score, n_col2-k-pivot_row_degree, max_deg) ; #endif /* NDEBUG */ /* === Add up set differences for each column ======================= */ DEBUG3 (("** Adding set differences phase. **\n")) ; /* for each column in pivot row */ rp = &A [pivot_row_start] ; rp_end = rp + pivot_row_length ; while (rp < rp_end) { /* get a column */ col = *rp++ ; ASSERT (COL_IS_ALIVE (col) && col != pivot_col) ; hash = 0 ; cur_score = 0 ; cp = &A [Col [col].start] ; /* compact the column */ new_cp = cp ; cp_end = cp + Col [col].length ; DEBUG4 (("Adding set diffs for Col: %d.\n", col)) ; while (cp < cp_end) { /* get a row */ row = *cp++ ; ASSERT(row >= 0 && row < n_row) ; row_mark = Row [row].shared2.mark ; /* skip if dead */ if (ROW_IS_MARKED_DEAD (row_mark)) { continue ; } ASSERT (row_mark > tag_mark) ; /* compact the column */ *new_cp++ = row ; /* compute hash function */ hash += row ; /* add set difference */ cur_score += row_mark - tag_mark ; /* integer overflow... */ cur_score = MIN (cur_score, n_col) ; } /* recompute the column's length */ Col [col].length = (int) (new_cp - &A [Col [col].start]) ; /* === Further mass elimination ================================= */ if (Col [col].length == 0) { DEBUG4 (("further mass elimination. Col: %d\n", col)) ; /* nothing left but the pivot row in this column */ KILL_PRINCIPAL_COL (col) ; pivot_row_degree -= Col [col].shared1.thickness ; ASSERT (pivot_row_degree >= 0) ; /* order it */ Col [col].shared2.order = k ; /* increment order count by column thickness */ k += Col [col].shared1.thickness ; } else { /* === Prepare for supercolumn detection ==================== */ DEBUG4 (("Preparing supercol detection for Col: %d.\n", col)) ; /* save score so far */ Col [col].shared2.score = cur_score ; /* add column to hash table, for supercolumn detection */ hash %= n_col + 1 ; DEBUG4 ((" Hash = %d, n_col = %d.\n", hash, n_col)) ; ASSERT (hash <= n_col) ; head_column = head [hash] ; if (head_column > EMPTY) { /* degree list "hash" is non-empty, use prev (shared3) of */ /* first column in degree list as head of hash bucket */ first_col = Col [head_column].shared3.headhash ; Col [head_column].shared3.headhash = col ; } else { /* degree list "hash" is empty, use head as hash bucket */ first_col = - (head_column + 2) ; head [hash] = - (col + 2) ; } Col [col].shared4.hash_next = first_col ; /* save hash function in Col [col].shared3.hash */ Col [col].shared3.hash = (int) hash ; ASSERT (COL_IS_ALIVE (col)) ; } } /* The approximate external column degree is now computed. */ /* === Supercolumn detection ======================================== */ DEBUG3 (("** Supercolumn detection phase. **\n")) ; detect_super_cols ( #ifndef NDEBUG n_col, Row, #endif /* NDEBUG */ Col, A, head, pivot_row_start, pivot_row_length) ; /* === Kill the pivotal column ====================================== */ KILL_PRINCIPAL_COL (pivot_col) ; /* === Clear mark =================================================== */ tag_mark += (max_deg + 1) ; if (tag_mark >= max_mark) { DEBUG2 (("clearing tag_mark\n")) ; tag_mark = clear_mark (n_row, Row) ; } #ifndef NDEBUG DEBUG3 (("check3\n")) ; debug_mark (n_row, Row, tag_mark, max_mark) ; #endif /* NDEBUG */ /* === Finalize the new pivot row, and column scores ================ */ DEBUG3 (("** Finalize scores phase. **\n")) ; /* for each column in pivot row */ rp = &A [pivot_row_start] ; /* compact the pivot row */ new_rp = rp ; rp_end = rp + pivot_row_length ; while (rp < rp_end) { col = *rp++ ; /* skip dead columns */ if (COL_IS_DEAD (col)) { continue ; } *new_rp++ = col ; /* add new pivot row to column */ A [Col [col].start + (Col [col].length++)] = pivot_row ; /* retrieve score so far and add on pivot row's degree. */ /* (we wait until here for this in case the pivot */ /* row's degree was reduced due to mass elimination). */ cur_score = Col [col].shared2.score + pivot_row_degree ; /* calculate the max possible score as the number of */ /* external columns minus the 'k' value minus the */ /* columns thickness */ max_score = n_col - k - Col [col].shared1.thickness ; /* make the score the external degree of the union-of-rows */ cur_score -= Col [col].shared1.thickness ; /* make sure score is less or equal than the max score */ cur_score = MIN (cur_score, max_score) ; ASSERT (cur_score >= 0) ; /* store updated score */ Col [col].shared2.score = cur_score ; /* === Place column back in degree list ========================= */ ASSERT (min_score >= 0) ; ASSERT (min_score <= n_col) ; ASSERT (cur_score >= 0) ; ASSERT (cur_score <= n_col) ; ASSERT (head [cur_score] >= EMPTY) ; next_col = head [cur_score] ; Col [col].shared4.degree_next = next_col ; Col [col].shared3.prev = EMPTY ; if (next_col != EMPTY) { Col [next_col].shared3.prev = col ; } head [cur_score] = col ; /* see if this score is less than current min */ min_score = MIN (min_score, cur_score) ; } #ifndef NDEBUG debug_deg_lists (n_row, n_col, Row, Col, head, min_score, n_col2-k, max_deg) ; #endif /* NDEBUG */ /* === Resurrect the new pivot row ================================== */ if (pivot_row_degree > 0) { /* update pivot row length to reflect any cols that were killed */ /* during super-col detection and mass elimination */ Row [pivot_row].start = pivot_row_start ; Row [pivot_row].length = (int) (new_rp - &A[pivot_row_start]) ; Row [pivot_row].shared1.degree = pivot_row_degree ; Row [pivot_row].shared2.mark = 0 ; /* pivot row is no longer dead */ } } /* === All principal columns have now been ordered ====================== */ return (ngarbage) ; } /* ========================================================================== */ /* === order_children ======================================================= */ /* ========================================================================== */ /* The find_ordering routine has ordered all of the principal columns (the representatives of the supercolumns). The non-principal columns have not yet been ordered. This routine orders those columns by walking up the parent tree (a column is a child of the column which absorbed it). The final permutation vector is then placed in p [0 ... n_col-1], with p [0] being the first column, and p [n_col-1] being the last. It doesn't look like it at first glance, but be assured that this routine takes time linear in the number of columns. Although not immediately obvious, the time taken by this routine is O (n_col), that is, linear in the number of columns. Not user-callable. */ PRIVATE void order_children ( /* === Parameters ======================================================= */ int n_col, /* number of columns of A */ Colamd_Col Col [], /* of size n_col+1 */ int p [] /* p [0 ... n_col-1] is the column permutation*/ ) { /* === Local variables ================================================== */ int i ; /* loop counter for all columns */ int c ; /* column index */ int parent ; /* index of column's parent */ int order ; /* column's order */ /* === Order each non-principal column ================================== */ for (i = 0 ; i < n_col ; i++) { /* find an un-ordered non-principal column */ ASSERT (COL_IS_DEAD (i)) ; if (!COL_IS_DEAD_PRINCIPAL (i) && Col [i].shared2.order == EMPTY) { parent = i ; /* once found, find its principal parent */ do { parent = Col [parent].shared1.parent ; } while (!COL_IS_DEAD_PRINCIPAL (parent)) ; /* now, order all un-ordered non-principal columns along path */ /* to this parent. collapse tree at the same time */ c = i ; /* get order of parent */ order = Col [parent].shared2.order ; do { ASSERT (Col [c].shared2.order == EMPTY) ; /* order this column */ Col [c].shared2.order = order++ ; /* collaps tree */ Col [c].shared1.parent = parent ; /* get immediate parent of this column */ c = Col [c].shared1.parent ; /* continue until we hit an ordered column. There are */ /* guarranteed not to be anymore unordered columns */ /* above an ordered column */ } while (Col [c].shared2.order == EMPTY) ; /* re-order the super_col parent to largest order for this group */ Col [parent].shared2.order = order ; } } /* === Generate the permutation ========================================= */ for (c = 0 ; c < n_col ; c++) { p [Col [c].shared2.order] = c ; } } /* ========================================================================== */ /* === detect_super_cols ==================================================== */ /* ========================================================================== */ /* Detects supercolumns by finding matches between columns in the hash buckets. Check amongst columns in the set A [row_start ... row_start + row_length-1]. The columns under consideration are currently *not* in the degree lists, and have already been placed in the hash buckets. The hash bucket for columns whose hash function is equal to h is stored as follows: if head [h] is >= 0, then head [h] contains a degree list, so: head [h] is the first column in degree bucket h. Col [head [h]].headhash gives the first column in hash bucket h. otherwise, the degree list is empty, and: -(head [h] + 2) is the first column in hash bucket h. For a column c in a hash bucket, Col [c].shared3.prev is NOT a "previous column" pointer. Col [c].shared3.hash is used instead as the hash number for that column. The value of Col [c].shared4.hash_next is the next column in the same hash bucket. Assuming no, or "few" hash collisions, the time taken by this routine is linear in the sum of the sizes (lengths) of each column whose score has just been computed in the approximate degree computation. Not user-callable. */ PRIVATE void detect_super_cols ( /* === Parameters ======================================================= */ #ifndef NDEBUG /* these two parameters are only needed when debugging is enabled: */ int n_col, /* number of columns of A */ Colamd_Row Row [], /* of size n_row+1 */ #endif /* NDEBUG */ Colamd_Col Col [], /* of size n_col+1 */ int A [], /* row indices of A */ int head [], /* head of degree lists and hash buckets */ int row_start, /* pointer to set of columns to check */ int row_length /* number of columns to check */ ) { /* === Local variables ================================================== */ int hash ; /* hash value for a column */ int *rp ; /* pointer to a row */ int c ; /* a column index */ int super_c ; /* column index of the column to absorb into */ int *cp1 ; /* column pointer for column super_c */ int *cp2 ; /* column pointer for column c */ int length ; /* length of column super_c */ int prev_c ; /* column preceding c in hash bucket */ int i ; /* loop counter */ int *rp_end ; /* pointer to the end of the row */ int col ; /* a column index in the row to check */ int head_column ; /* first column in hash bucket or degree list */ int first_col ; /* first column in hash bucket */ /* === Consider each column in the row ================================== */ rp = &A [row_start] ; rp_end = rp + row_length ; while (rp < rp_end) { col = *rp++ ; if (COL_IS_DEAD (col)) { continue ; } /* get hash number for this column */ hash = Col [col].shared3.hash ; ASSERT (hash <= n_col) ; /* === Get the first column in this hash bucket ===================== */ head_column = head [hash] ; if (head_column > EMPTY) { first_col = Col [head_column].shared3.headhash ; } else { first_col = - (head_column + 2) ; } /* === Consider each column in the hash bucket ====================== */ for (super_c = first_col ; super_c != EMPTY ; super_c = Col [super_c].shared4.hash_next) { ASSERT (COL_IS_ALIVE (super_c)) ; ASSERT (Col [super_c].shared3.hash == hash) ; length = Col [super_c].length ; /* prev_c is the column preceding column c in the hash bucket */ prev_c = super_c ; /* === Compare super_c with all columns after it ================ */ for (c = Col [super_c].shared4.hash_next ; c != EMPTY ; c = Col [c].shared4.hash_next) { ASSERT (c != super_c) ; ASSERT (COL_IS_ALIVE (c)) ; ASSERT (Col [c].shared3.hash == hash) ; /* not identical if lengths or scores are different */ if (Col [c].length != length || Col [c].shared2.score != Col [super_c].shared2.score) { prev_c = c ; continue ; } /* compare the two columns */ cp1 = &A [Col [super_c].start] ; cp2 = &A [Col [c].start] ; for (i = 0 ; i < length ; i++) { /* the columns are "clean" (no dead rows) */ ASSERT (ROW_IS_ALIVE (*cp1)) ; ASSERT (ROW_IS_ALIVE (*cp2)) ; /* row indices will same order for both supercols, */ /* no gather scatter nessasary */ if (*cp1++ != *cp2++) { break ; } } /* the two columns are different if the for-loop "broke" */ if (i != length) { prev_c = c ; continue ; } /* === Got it! two columns are identical =================== */ ASSERT (Col [c].shared2.score == Col [super_c].shared2.score) ; Col [super_c].shared1.thickness += Col [c].shared1.thickness ; Col [c].shared1.parent = super_c ; KILL_NON_PRINCIPAL_COL (c) ; /* order c later, in order_children() */ Col [c].shared2.order = EMPTY ; /* remove c from hash bucket */ Col [prev_c].shared4.hash_next = Col [c].shared4.hash_next ; } } /* === Empty this hash bucket ======================================= */ if (head_column > EMPTY) { /* corresponding degree list "hash" is not empty */ Col [head_column].shared3.headhash = EMPTY ; } else { /* corresponding degree list "hash" is empty */ head [hash] = EMPTY ; } } } /* ========================================================================== */ /* === garbage_collection =================================================== */ /* ========================================================================== */ /* Defragments and compacts columns and rows in the workspace A. Used when all available memory has been used while performing row merging. Returns the index of the first free position in A, after garbage collection. The time taken by this routine is linear is the size of the array A, which is itself linear in the number of nonzeros in the input matrix. Not user-callable. */ PRIVATE int garbage_collection /* returns the new value of pfree */ ( /* === Parameters ======================================================= */ int n_row, /* number of rows */ int n_col, /* number of columns */ Colamd_Row Row [], /* row info */ Colamd_Col Col [], /* column info */ int A [], /* A [0 ... Alen-1] holds the matrix */ int *pfree /* &A [0] ... pfree is in use */ ) { /* === Local variables ================================================== */ int *psrc ; /* source pointer */ int *pdest ; /* destination pointer */ int j ; /* counter */ int r ; /* a row index */ int c ; /* a column index */ int length ; /* length of a row or column */ #ifndef NDEBUG int debug_rows ; DEBUG2 (("Defrag..\n")) ; for (psrc = &A[0] ; psrc < pfree ; psrc++) ASSERT (*psrc >= 0) ; debug_rows = 0 ; #endif /* NDEBUG */ /* === Defragment the columns =========================================== */ pdest = &A[0] ; for (c = 0 ; c < n_col ; c++) { if (COL_IS_ALIVE (c)) { psrc = &A [Col [c].start] ; /* move and compact the column */ ASSERT (pdest <= psrc) ; Col [c].start = (int) (pdest - &A [0]) ; length = Col [c].length ; for (j = 0 ; j < length ; j++) { r = *psrc++ ; if (ROW_IS_ALIVE (r)) { *pdest++ = r ; } } Col [c].length = (int) (pdest - &A [Col [c].start]) ; } } /* === Prepare to defragment the rows =================================== */ for (r = 0 ; r < n_row ; r++) { if (ROW_IS_ALIVE (r)) { if (Row [r].length == 0) { /* this row is of zero length. cannot compact it, so kill it */ DEBUG3 (("Defrag row kill\n")) ; KILL_ROW (r) ; } else { /* save first column index in Row [r].shared2.first_column */ psrc = &A [Row [r].start] ; Row [r].shared2.first_column = *psrc ; ASSERT (ROW_IS_ALIVE (r)) ; /* flag the start of the row with the one's complement of row */ *psrc = ONES_COMPLEMENT (r) ; #ifndef NDEBUG debug_rows++ ; #endif /* NDEBUG */ } } } /* === Defragment the rows ============================================== */ psrc = pdest ; while (psrc < pfree) { /* find a negative number ... the start of a row */ if (*psrc++ < 0) { psrc-- ; /* get the row index */ r = ONES_COMPLEMENT (*psrc) ; ASSERT (r >= 0 && r < n_row) ; /* restore first column index */ *psrc = Row [r].shared2.first_column ; ASSERT (ROW_IS_ALIVE (r)) ; /* move and compact the row */ ASSERT (pdest <= psrc) ; Row [r].start = (int) (pdest - &A [0]) ; length = Row [r].length ; for (j = 0 ; j < length ; j++) { c = *psrc++ ; if (COL_IS_ALIVE (c)) { *pdest++ = c ; } } Row [r].length = (int) (pdest - &A [Row [r].start]) ; #ifndef NDEBUG debug_rows-- ; #endif /* NDEBUG */ } } /* ensure we found all the rows */ ASSERT (debug_rows == 0) ; /* === Return the new value of pfree ==================================== */ return ((int) (pdest - &A [0])) ; } /* ========================================================================== */ /* === clear_mark =========================================================== */ /* ========================================================================== */ /* Clears the Row [].shared2.mark array, and returns the new tag_mark. Return value is the new tag_mark. Not user-callable. */ PRIVATE int clear_mark /* return the new value for tag_mark */ ( /* === Parameters ======================================================= */ int n_row, /* number of rows in A */ Colamd_Row Row [] /* Row [0 ... n_row-1].shared2.mark is set to zero */ ) { /* === Local variables ================================================== */ int r ; for (r = 0 ; r < n_row ; r++) { if (ROW_IS_ALIVE (r)) { Row [r].shared2.mark = 0 ; } } return (1) ; } /* ========================================================================== */ /* === print_report ========================================================= */ /* ========================================================================== */ PRIVATE void print_report ( char *method, int stats [COLAMD_STATS] ) { int i1, i2, i3 ; if (!stats) { PRINTF ("%s: No statistics available.\n", method) ; return ; } i1 = stats [COLAMD_INFO1] ; i2 = stats [COLAMD_INFO2] ; i3 = stats [COLAMD_INFO3] ; if (stats [COLAMD_STATUS] >= 0) { PRINTF ("%s: OK. ", method) ; } else { PRINTF ("%s: ERROR. ", method) ; } switch (stats [COLAMD_STATUS]) { case COLAMD_OK_BUT_JUMBLED: PRINTF ("Matrix has unsorted or duplicate row indices.\n") ; PRINTF ("%s: number of duplicate or out-of-order row indices: %d\n", method, i3) ; PRINTF ("%s: last seen duplicate or out-of-order row index: %d\n", method, INDEX (i2)) ; PRINTF ("%s: last seen in column: %d", method, INDEX (i1)) ; /* no break - fall through to next case instead */ case COLAMD_OK: PRINTF ("\n") ; PRINTF ("%s: number of dense or empty rows ignored: %d\n", method, stats [COLAMD_DENSE_ROW]) ; PRINTF ("%s: number of dense or empty columns ignored: %d\n", method, stats [COLAMD_DENSE_COL]) ; PRINTF ("%s: number of garbage collections performed: %d\n", method, stats [COLAMD_DEFRAG_COUNT]) ; break ; case COLAMD_ERROR_A_not_present: PRINTF ("Array A (row indices of matrix) not present.\n") ; break ; case COLAMD_ERROR_p_not_present: PRINTF ("Array p (column pointers for matrix) not present.\n") ; break ; case COLAMD_ERROR_nrow_negative: PRINTF ("Invalid number of rows (%d).\n", i1) ; break ; case COLAMD_ERROR_ncol_negative: PRINTF ("Invalid number of columns (%d).\n", i1) ; break ; case COLAMD_ERROR_nnz_negative: PRINTF ("Invalid number of nonzero entries (%d).\n", i1) ; break ; case COLAMD_ERROR_p0_nonzero: PRINTF ("Invalid column pointer, p [0] = %d, must be zero.\n", i1) ; break ; case COLAMD_ERROR_A_too_small: PRINTF ("Array A too small.\n") ; PRINTF (" Need Alen >= %d, but given only Alen = %d.\n", i1, i2) ; break ; case COLAMD_ERROR_col_length_negative: PRINTF ("Column %d has a negative number of nonzero entries (%d).\n", INDEX (i1), i2) ; break ; case COLAMD_ERROR_row_index_out_of_bounds: PRINTF ("Row index (row %d) out of bounds (%d to %d) in column %d.\n", INDEX (i2), INDEX (0), INDEX (i3-1), INDEX (i1)) ; break ; case COLAMD_ERROR_out_of_memory: PRINTF ("Out of memory.\n") ; break ; case COLAMD_ERROR_internal_error: /* if this happens, there is a bug in the code */ PRINTF ("Internal error! Please contact authors (davis@cise.ufl.edu).\n") ; break ; } } /* ========================================================================== */ /* === colamd debugging routines ============================================ */ /* ========================================================================== */ /* When debugging is disabled, the remainder of this file is ignored. */ #ifndef NDEBUG /* ========================================================================== */ /* === debug_structures ===================================================== */ /* ========================================================================== */ /* At this point, all empty rows and columns are dead. All live columns are "clean" (containing no dead rows) and simplicial (no supercolumns yet). Rows may contain dead columns, but all live rows contain at least one live column. */ PRIVATE void debug_structures ( /* === Parameters ======================================================= */ int n_row, int n_col, Colamd_Row Row [], Colamd_Col Col [], int A [], int n_col2 ) { /* === Local variables ================================================== */ int i ; int c ; int *cp ; int *cp_end ; int len ; int score ; int r ; int *rp ; int *rp_end ; int deg ; /* === Check A, Row, and Col ============================================ */ for (c = 0 ; c < n_col ; c++) { if (COL_IS_ALIVE (c)) { len = Col [c].length ; score = Col [c].shared2.score ; DEBUG4 (("initial live col %5d %5d %5d\n", c, len, score)) ; ASSERT (len > 0) ; ASSERT (score >= 0) ; ASSERT (Col [c].shared1.thickness == 1) ; cp = &A [Col [c].start] ; cp_end = cp + len ; while (cp < cp_end) { r = *cp++ ; ASSERT (ROW_IS_ALIVE (r)) ; } } else { i = Col [c].shared2.order ; ASSERT (i >= n_col2 && i < n_col) ; } } for (r = 0 ; r < n_row ; r++) { if (ROW_IS_ALIVE (r)) { i = 0 ; len = Row [r].length ; deg = Row [r].shared1.degree ; ASSERT (len > 0) ; ASSERT (deg > 0) ; rp = &A [Row [r].start] ; rp_end = rp + len ; while (rp < rp_end) { c = *rp++ ; if (COL_IS_ALIVE (c)) { i++ ; } } ASSERT (i > 0) ; } } } /* ========================================================================== */ /* === debug_deg_lists ====================================================== */ /* ========================================================================== */ /* Prints the contents of the degree lists. Counts the number of columns in the degree list and compares it to the total it should have. Also checks the row degrees. */ PRIVATE void debug_deg_lists ( /* === Parameters ======================================================= */ int n_row, int n_col, Colamd_Row Row [], Colamd_Col Col [], int head [], int min_score, int should, int max_deg ) { /* === Local variables ================================================== */ int deg ; int col ; int have ; int row ; /* === Check the degree lists =========================================== */ if (n_col > 10000 && colamd_debug <= 0) { return ; } have = 0 ; DEBUG4 (("Degree lists: %d\n", min_score)) ; for (deg = 0 ; deg <= n_col ; deg++) { col = head [deg] ; if (col == EMPTY) { continue ; } DEBUG4 (("%d:", deg)) ; while (col != EMPTY) { DEBUG4 ((" %d", col)) ; have += Col [col].shared1.thickness ; ASSERT (COL_IS_ALIVE (col)) ; col = Col [col].shared4.degree_next ; } DEBUG4 (("\n")) ; } DEBUG4 (("should %d have %d\n", should, have)) ; ASSERT (should == have) ; /* === Check the row degrees ============================================ */ if (n_row > 10000 && colamd_debug <= 0) { return ; } for (row = 0 ; row < n_row ; row++) { if (ROW_IS_ALIVE (row)) { ASSERT (Row [row].shared1.degree <= max_deg) ; } } } /* ========================================================================== */ /* === debug_mark =========================================================== */ /* ========================================================================== */ /* Ensures that the tag_mark is less that the maximum and also ensures that each entry in the mark array is less than the tag mark. */ PRIVATE void debug_mark ( /* === Parameters ======================================================= */ int n_row, Colamd_Row Row [], int tag_mark, int max_mark ) { /* === Local variables ================================================== */ int r ; /* === Check the Row marks ============================================== */ ASSERT (tag_mark > 0 && tag_mark <= max_mark) ; if (n_row > 10000 && colamd_debug <= 0) { return ; } for (r = 0 ; r < n_row ; r++) { ASSERT (Row [r].shared2.mark < tag_mark) ; } } /* ========================================================================== */ /* === debug_matrix ========================================================= */ /* ========================================================================== */ /* Prints out the contents of the columns and the rows. */ PRIVATE void debug_matrix ( /* === Parameters ======================================================= */ int n_row, int n_col, Colamd_Row Row [], Colamd_Col Col [], int A [] ) { /* === Local variables ================================================== */ int r ; int c ; int *rp ; int *rp_end ; int *cp ; int *cp_end ; /* === Dump the rows and columns of the matrix ========================== */ if (colamd_debug < 3) { return ; } DEBUG3 (("DUMP MATRIX:\n")) ; for (r = 0 ; r < n_row ; r++) { DEBUG3 (("Row %d alive? %d\n", r, ROW_IS_ALIVE (r))) ; if (ROW_IS_DEAD (r)) { continue ; } DEBUG3 (("start %d length %d degree %d\n", Row [r].start, Row [r].length, Row [r].shared1.degree)) ; rp = &A [Row [r].start] ; rp_end = rp + Row [r].length ; while (rp < rp_end) { c = *rp++ ; DEBUG4 ((" %d col %d\n", COL_IS_ALIVE (c), c)) ; } } for (c = 0 ; c < n_col ; c++) { DEBUG3 (("Col %d alive? %d\n", c, COL_IS_ALIVE (c))) ; if (COL_IS_DEAD (c)) { continue ; } DEBUG3 (("start %d length %d shared1 %d shared2 %d\n", Col [c].start, Col [c].length, Col [c].shared1.thickness, Col [c].shared2.score)) ; cp = &A [Col [c].start] ; cp_end = cp + Col [c].length ; while (cp < cp_end) { r = *cp++ ; DEBUG4 ((" %d row %d\n", ROW_IS_ALIVE (r), r)) ; } } } PRIVATE void colamd_get_debug ( char *method ) { colamd_debug = 0 ; /* no debug printing */ /* get "D" environment variable, which gives the debug printing level */ if (getenv ("D")) { colamd_debug = atoi (getenv ("D")) ; } DEBUG0 (("%s: debug version, D = %d (THIS WILL BE SLOW!)\n", method, colamd_debug)) ; } #endif /* NDEBUG */ SuperLU_DIST_5.3.0/SRC/colamd.h0000644013363400111340000002256413233431301014704 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file colamd.h \brief Colamd prototypes and definitions
 
    ==========================================================================
    === colamd/symamd prototypes and definitions =============================
    ==========================================================================

    You must include this file (colamd.h) in any routine that uses colamd,
    symamd, or the related macros and definitions.

    Authors:

	The authors of the code itself are Stefan I. Larimore and Timothy A.
	Davis (davis@cise.ufl.edu), University of Florida.  The algorithm was
	developed in collaboration with John Gilbert, Xerox PARC, and Esmond
	Ng, Oak Ridge National Laboratory.

    Date:

	September 8, 2003.  Version 2.3.

    Acknowledgements:

	This work was supported by the National Science Foundation, under
	grants DMS-9504974 and DMS-9803599.

    Notice:

	Copyright (c) 1998-2003 by the University of Florida.
	All Rights Reserved.

	THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
	EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.

	Permission is hereby granted to use, copy, modify, and/or distribute
	this program, provided that the Copyright, this License, and the
	Availability of the original version is retained on all copies and made
	accessible to the end-user of any code or package that includes COLAMD
	or any modified version of COLAMD. 

    Availability:

	The colamd/symamd library is available at

	    http://www.cise.ufl.edu/research/sparse/colamd/

	This is the http://www.cise.ufl.edu/research/sparse/colamd/colamd.h
	file.  It is required by the colamd.c, colamdmex.c, and symamdmex.c
	files, and by any C code that calls the routines whose prototypes are
	listed below, or that uses the colamd/symamd definitions listed below.
 
*/ #ifndef COLAMD_H #define COLAMD_H /* ========================================================================== */ /* === Include files ======================================================== */ /* ========================================================================== */ #include /* ========================================================================== */ /* === Knob and statistics definitions ====================================== */ /* ========================================================================== */ /* size of the knobs [ ] array. Only knobs [0..1] are currently used. */ #define COLAMD_KNOBS 20 /* number of output statistics. Only stats [0..6] are currently used. */ #define COLAMD_STATS 20 /* knobs [0] and stats [0]: dense row knob and output statistic. */ #define COLAMD_DENSE_ROW 0 /* knobs [1] and stats [1]: dense column knob and output statistic. */ #define COLAMD_DENSE_COL 1 /* stats [2]: memory defragmentation count output statistic */ #define COLAMD_DEFRAG_COUNT 2 /* stats [3]: colamd status: zero OK, > 0 warning or notice, < 0 error */ #define COLAMD_STATUS 3 /* stats [4..6]: error info, or info on jumbled columns */ #define COLAMD_INFO1 4 #define COLAMD_INFO2 5 #define COLAMD_INFO3 6 /* error codes returned in stats [3]: */ #define COLAMD_OK (0) #define COLAMD_OK_BUT_JUMBLED (1) #define COLAMD_ERROR_A_not_present (-1) #define COLAMD_ERROR_p_not_present (-2) #define COLAMD_ERROR_nrow_negative (-3) #define COLAMD_ERROR_ncol_negative (-4) #define COLAMD_ERROR_nnz_negative (-5) #define COLAMD_ERROR_p0_nonzero (-6) #define COLAMD_ERROR_A_too_small (-7) #define COLAMD_ERROR_col_length_negative (-8) #define COLAMD_ERROR_row_index_out_of_bounds (-9) #define COLAMD_ERROR_out_of_memory (-10) #define COLAMD_ERROR_internal_error (-999) /* ========================================================================== */ /* === Row and Column structures ============================================ */ /* ========================================================================== */ /* User code that makes use of the colamd/symamd routines need not directly */ /* reference these structures. They are used only for the COLAMD_RECOMMENDED */ /* macro. */ typedef struct Colamd_Col_struct { int start ; /* index for A of first row in this column, or DEAD */ /* if column is dead */ int length ; /* number of rows in this column */ union { int thickness ; /* number of original columns represented by this */ /* col, if the column is alive */ int parent ; /* parent in parent tree super-column structure, if */ /* the column is dead */ } shared1 ; union { int score ; /* the score used to maintain heap, if col is alive */ int order ; /* pivot ordering of this column, if col is dead */ } shared2 ; union { int headhash ; /* head of a hash bucket, if col is at the head of */ /* a degree list */ int hash ; /* hash value, if col is not in a degree list */ int prev ; /* previous column in degree list, if col is in a */ /* degree list (but not at the head of a degree list) */ } shared3 ; union { int degree_next ; /* next column, if col is in a degree list */ int hash_next ; /* next column, if col is in a hash list */ } shared4 ; } Colamd_Col ; typedef struct Colamd_Row_struct { int start ; /* index for A of first col in this row */ int length ; /* number of principal columns in this row */ union { int degree ; /* number of principal & non-principal columns in row */ int p ; /* used as a row pointer in init_rows_cols () */ } shared1 ; union { int mark ; /* for computing set differences and marking dead rows*/ int first_column ;/* first column in row (used in garbage collection) */ } shared2 ; } Colamd_Row ; /* ========================================================================== */ /* === Colamd recommended memory size ======================================= */ /* ========================================================================== */ /* The recommended length Alen of the array A passed to colamd is given by the COLAMD_RECOMMENDED (nnz, n_row, n_col) macro. It returns -1 if any argument is negative. 2*nnz space is required for the row and column indices of the matrix. COLAMD_C (n_col) + COLAMD_R (n_row) space is required for the Col and Row arrays, respectively, which are internal to colamd. An additional n_col space is the minimal amount of "elbow room", and nnz/5 more space is recommended for run time efficiency. This macro is not needed when using symamd. Explicit typecast to int added Sept. 23, 2002, COLAMD version 2.2, to avoid gcc -pedantic warning messages. */ #define COLAMD_C(n_col) ((int) (((n_col) + 1) * sizeof (Colamd_Col) / sizeof (int))) #define COLAMD_R(n_row) ((int) (((n_row) + 1) * sizeof (Colamd_Row) / sizeof (int))) #define COLAMD_RECOMMENDED(nnz, n_row, n_col) \ ( \ ((nnz) < 0 || (n_row) < 0 || (n_col) < 0) \ ? \ (-1) \ : \ (2 * (nnz) + COLAMD_C (n_col) + COLAMD_R (n_row) + (n_col) + ((nnz) / 5)) \ ) /* ========================================================================== */ /* === Prototypes of user-callable routines ================================= */ /* ========================================================================== */ int colamd_recommended /* returns recommended value of Alen, */ /* or (-1) if input arguments are erroneous */ ( int nnz, /* nonzeros in A */ int n_row, /* number of rows in A */ int n_col /* number of columns in A */ ) ; void colamd_set_defaults /* sets default parameters */ ( /* knobs argument is modified on output */ double knobs [COLAMD_KNOBS] /* parameter settings for colamd */ ) ; int colamd /* returns (1) if successful, (0) otherwise*/ ( /* A and p arguments are modified on output */ int n_row, /* number of rows in A */ int n_col, /* number of columns in A */ int Alen, /* size of the array A */ int A [], /* row indices of A, of size Alen */ int p [], /* column pointers of A, of size n_col+1 */ double knobs [COLAMD_KNOBS],/* parameter settings for colamd */ int stats [COLAMD_STATS] /* colamd output statistics and error codes */ ) ; int symamd /* return (1) if OK, (0) otherwise */ ( int n, /* number of rows and columns of A */ int A [], /* row indices of A */ int p [], /* column pointers of A */ int perm [], /* output permutation, size n_col+1 */ double knobs [COLAMD_KNOBS], /* parameters (uses defaults if NULL) */ int stats [COLAMD_STATS], /* output statistics and error codes */ void * (*allocate) (size_t, size_t), /* pointer to calloc (ANSI C) or */ /* mxCalloc (for MATLAB mexFunction) */ void (*release) (void *) /* pointer to free (ANSI C) or */ /* mxFree (for MATLAB mexFunction) */ ) ; void colamd_report ( int stats [COLAMD_STATS] ) ; void symamd_report ( int stats [COLAMD_STATS] ) ; #endif /* COLAMD_H */ SuperLU_DIST_5.3.0/SRC/dsp_blas3_dist.c0000644013363400111340000001100413233431301016320 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Sparse BLAS3, using some dense BLAS3 operations * *
 * -- Distributed SuperLU routine (version 1.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 * 
*/ /* * File name: sp_blas3.c * Purpose: Sparse BLAS3, using some dense BLAS3 operations. */ #include "superlu_ddefs.h" /*! \brief
  Purpose   
    =======   

    sp_d performs one of the matrix-matrix operations   

       C := alpha*op( A )*op( B ) + beta*C,   

    where  op( X ) is one of 

       op( X ) = X   or   op( X ) = X'   or   op( X ) = conjg( X' ),

    alpha and beta are scalars, and A, B and C are matrices, with op( A ) 
    an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix. 
  

    Parameters   
    ==========   

    TRANSA - (input) char*
             On entry, TRANSA specifies the form of op( A ) to be used in 
             the matrix multiplication as follows:   
                TRANSA = 'N' or 'n',  op( A ) = A.   
                TRANSA = 'T' or 't',  op( A ) = A'.   
                TRANSA = 'C' or 'c',  op( A ) = conjg( A' ).   
             Unchanged on exit.   

    TRANSB - (input) char*
             On entry, TRANSB specifies the form of op( B ) to be used in 
             the matrix multiplication as follows:   
                TRANSB = 'N' or 'n',  op( B ) = B.   
                TRANSB = 'T' or 't',  op( B ) = B'.   
                TRANSB = 'C' or 'c',  op( B ) = conjg( B' ).   
             Unchanged on exit.   

    M      - (input) int   
             On entry,  M  specifies  the number of rows of the matrix 
	     op( A ) and of the matrix C.  M must be at least zero. 
	     Unchanged on exit.   

    N      - (input) int
             On entry,  N specifies the number of columns of the matrix 
	     op( B ) and the number of columns of the matrix C. N must be 
	     at least zero.
	     Unchanged on exit.   

    K      - (input) int
             On entry, K specifies the number of columns of the matrix 
	     op( A ) and the number of rows of the matrix op( B ). K must 
	     be at least  zero.   
             Unchanged on exit.
	     
    ALPHA  - (input) double
             On entry, ALPHA specifies the scalar alpha.   

    A      - (input) SuperMatrix*
             Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
             Currently, the type of A can be:
                 Stype = SLU_NC or SLU_NCP; Dtype = SLU_D; Mtype = SLU_GE. 
             In the future, more general A can be handled.

    B      - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is 
             n when TRANSB = 'N' or 'n',  and is  k otherwise.   
             Before entry with  TRANSB = 'N' or 'n',  the leading k by n 
             part of the array B must contain the matrix B, otherwise 
             the leading n by k part of the array B must contain the 
             matrix B.   
             Unchanged on exit.   

    LDB    - (input) int
             On entry, LDB specifies the first dimension of B as declared 
             in the calling (sub) program. LDB must be at least max( 1, n ).  
             Unchanged on exit.   

    BETA   - (input) double
             On entry, BETA specifies the scalar beta. When BETA is   
             supplied as zero then C need not be set on input.   

    C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).   
             Before entry, the leading m by n part of the array C must 
             contain the matrix C,  except when beta is zero, in which 
             case C need not be set on entry.   
             On exit, the array C is overwritten by the m by n matrix 
	     ( alpha*op( A )*B + beta*C ).   

    LDC    - (input) int
             On entry, LDC specifies the first dimension of C as declared 
             in the calling (sub)program. LDC must be at least max(1,m).   
             Unchanged on exit.   

    ==== Sparse Level 3 Blas routine.   
*/ int sp_dgemm_dist(char *transa, int n, double alpha, SuperMatrix *A, double *b, int ldb, double beta, double *c, int ldc) { int incx = 1, incy = 1; int j; for (j = 0; j < n; ++j) { sp_dgemv_dist(transa, alpha, A, &b[ldb*j], incx, beta, &c[ldc*j], incy); } return 0; } SuperLU_DIST_5.3.0/SRC/comm.c0000644013363400111340000000740413233431301014367 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Broadcast an array of *dtype* numbers * *
 * -- Distributed SuperLU routine (version 1.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 * 
*/ #include "superlu_ddefs.h" /*! \brief * *
 * Purpose
 * =======
 *   Broadcast an array of *dtype* numbers. The communication pattern
 *   is a tree with number of branches equal to NBRANCHES.
 *   The process ranks are between 0 and Np-1.
 * 
 *   The following two pairs of graphs give different ways of viewing the same
 *   algorithm.  The first pair shows the trees as they should be visualized
 *   when examining the algorithm.  The second pair are isomorphic graphs of
 *   of the first, which show the actual pattern of data movement.
 *   Note that a tree broadcast with NBRANCHES = 2 is isomorphic with a
 *   hypercube broadcast (however, it does not require the nodes be a
 *   power of two to work).
 *
 *    TREE BROADCAST, NBRANCHES = 2     *    TREE BROADCAST, NBRANCHES = 3
 *       
 *     root=2
 * i=4   &______________                *
 *       |              \               *       root=2
 * i=2   &______         &______        * i=3     &______________________
 *       |      \        |      \       *         |          \           \
 * i=1   &__     &__     &__     &__    * i=1     &______     &______     &__
 *       |  \    |  \    |  \    |  \   *         |  \   \    |  \   \    |  \
 *       2   3   4   5   6   7   0   1  *         2   3   4   5   6   7   0   1
 *
 *
 *          ISOMORPHIC GRAPHS OF ABOVE, SHOWN IN MORE FAMILIAR TERMS:
 *
 *                2                                           2
 *       _________|_________                       ___________|____________
 *      /         |         \                     /           |      |     \
 *     6          4          3                   5            0      3      4
 *    / \         |                             / \           |
 *   0   7        5                            6   7          1
 *   |
 *   1
 *
 *
 * Arguments
 * =========
 * 
 * scope
 * 
*/ void bcast_tree(void *buf, int count, MPI_Datatype dtype, int root, int tag, gridinfo_t *grid, int scope, int *recvcnt) { int Iam, i, j, Np, nbranches = 2; int destdist; /* The distance of the destination node. */ int mydist; /* My distance from root. */ superlu_scope_t *scp; MPI_Status status; if ( scope == COMM_COLUMN ) scp = &grid->cscp; else if ( scope == ROW ) scp = &grid->rscp; Np = scp->Np; if ( Np < 2 ) return; Iam = scp->Iam; if ( Iam == root ) { for (i = nbranches; i < Np; i *= nbranches); for (i /= nbranches; i > 0; i /= nbranches) { for (j = 1; j < nbranches; ++j) { destdist = i*j; if ( destdist < Np ) MPI_Send( buf, count, dtype, (Iam+destdist)%Np, tag, scp->comm ); } } } else { mydist = (Np + Iam - root) % Np; for (i = nbranches; i < Np; i *= nbranches); for (i /= nbranches; (mydist%i); i /= nbranches); /* MPI_Probe( MPI_ANY_SOURCE, tag, scp->comm, &status );*/ MPI_Recv( buf, count, dtype, MPI_ANY_SOURCE, tag, scp->comm, &status ); MPI_Get_count( &status, dtype, recvcnt ); /* I need to send data to others. */ while ( (i > 1) && !(mydist%i) ) { i /= nbranches; for (j = 1; j < nbranches; ++j) { destdist = mydist + j*i; if ( destdist < Np ) MPI_Send( buf, *recvcnt, dtype, (root+destdist)%Np, tag, scp->comm ); } } } } /* BCAST_TREE */ SuperLU_DIST_5.3.0/SRC/zscatter.c0000644013363400111340000003614713233431301015301 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Scatter the computed blocks into LU destination. * *
 * -- Distributed SuperLU routine (version 5.2) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * October 1, 2014
 *
 * Modified: 
 *   September 18, 2017, enable SIMD vectorized scatter operation.
 *   
 */
#include 
#include "superlu_zdefs.h"

static void
zscatter_l_1 (int ib,
           int ljb,
           int nsupc,
           int_t iukp,
           int_t* xsup,
           int klst,
           int nbrow,
           int_t lptr,
           int temp_nbrow,
           int * usub,
           int * lsub,
           doublecomplex *tempv,
           int * indirect_thread,
           int_t ** Lrowind_bc_ptr, doublecomplex **Lnzval_bc_ptr,
	   gridinfo_t * grid)
{
    // TAU_STATIC_TIMER_START("SCATTER_LB");
    // printf("hello\n");
    int_t rel, i, segsize, jj;
    doublecomplex *nzval;
    int_t *index = Lrowind_bc_ptr[ljb];
    int_t ldv = index[1];       /* LDA of the dest lusup. */
    int_t lptrj = BC_HEADER;
    int_t luptrj = 0;
    int_t ijb = index[lptrj];
    while (ijb != ib)
    {
        /* Search for dest block --
           blocks are not ordered! */
        luptrj += index[lptrj + 1];
        lptrj += LB_DESCRIPTOR + index[lptrj + 1];

        ijb = index[lptrj];
    }
    /*
     * Build indirect table. This is needed because the
     * indices are not sorted for the L blocks.
     */
    int_t fnz = FstBlockC (ib);
    lptrj += LB_DESCRIPTOR;
    for (i = 0; i < index[lptrj - 1]; ++i)
    {
        rel = index[lptrj + i] - fnz;
        indirect_thread[rel] = i;

    }

    nzval = Lnzval_bc_ptr[ljb] + luptrj;
    // tempv =bigV + (cum_nrow + cum_ncol*nbrow);
    for (jj = 0; jj < nsupc; ++jj)
    {
        segsize = klst - usub[iukp + jj];
        // printf("segsize %d \n",segsize);
        if (segsize) {
            /*#pragma _CRI cache_bypass nzval,tempv */
            for (i = 0; i < temp_nbrow; ++i) {
                rel = lsub[lptr + i] - fnz;
	        z_sub(&nzval[indirect_thread[rel]], &nzval[indirect_thread[rel]],
                         &tempv[i]);
                // printf("i (src) %d, perm (dest) %d  \n",i,indirect_thread[rel]);
#ifdef PI_DEBUG
                double zz = 0.0;
                // if(!(*(long*)&zz == *(long*)&tempv[i]) )
                printf ("(%d %d, %0.3e, %0.3e, %3e ) ", ljb,
                        nzval - Lnzval_bc_ptr[ljb] + indirect_thread[rel],
                        nzval[indirect_thread[rel]] + tempv[i],
                        nzval[indirect_thread[rel]],tempv[i]);
                //printing triplets (location??, old value, new value ) if none of them is zero
#endif
            }
            // printf("\n");
            tempv += nbrow;
#ifdef PI_DEBUG
            // printf("\n");
#endif
        }
        nzval += ldv;
        // printf("%d\n",nzval );
    }
    // TAU_STATIC_TIMER_STOP("SCATTER_LB");
} /* zscatter_l_1 */

static void
zscatter_l (
           int ib,    /* row block number of source block L(i,k) */
           int ljb,   /* local column block number of dest. block L(i,j) */
           int nsupc, /* number of columns in destination supernode */
           int_t iukp, /* point to destination supernode's index[] */
           int_t* xsup,
           int klst,
           int nbrow,  /* LDA of the block in tempv[] */
           int_t lptr, /* Input, point to index[] location of block L(i,k) */
	   int temp_nbrow, /* number of rows of source block L(i,k) */
           int_t* usub,
           int_t* lsub,
           doublecomplex *tempv,
           int* indirect_thread,int* indirect2,
           int_t ** Lrowind_bc_ptr, doublecomplex **Lnzval_bc_ptr,
           gridinfo_t * grid)
{
    
    int_t rel, i, segsize, jj;
    doublecomplex *nzval;
    int_t *index = Lrowind_bc_ptr[ljb];
    int_t ldv = index[1];       /* LDA of the destination lusup. */
    int_t lptrj = BC_HEADER;
    int_t luptrj = 0;
    int_t ijb = index[lptrj];
    
    while (ijb != ib)  /* Search for destination block L(i,j) */
    {
        luptrj += index[lptrj + 1];
        lptrj += LB_DESCRIPTOR + index[lptrj + 1];
        ijb = index[lptrj];
    }
    
    /*
     * Build indirect table. This is needed because the indices are not sorted
     * in the L blocks.
     */
    int_t fnz = FstBlockC (ib);
    int_t dest_nbrow; 
    lptrj += LB_DESCRIPTOR;
    dest_nbrow=index[lptrj - 1];
    
#if (_OPENMP>=201307)
#pragma omp simd
#endif
    for (i = 0; i < dest_nbrow; ++i) {
        rel = index[lptrj + i] - fnz;
        indirect_thread[rel] = i;

    }

#if (_OPENMP>=201307)
#pragma omp simd
#endif
    /* can be precalculated? */
    for (i = 0; i < temp_nbrow; ++i) { /* Source index is a subset of dest. */
        rel = lsub[lptr + i] - fnz;
        indirect2[i] =indirect_thread[rel]; 
    }

    nzval = Lnzval_bc_ptr[ljb] + luptrj; /* Destination block L(i,j) */
#ifdef __INTEL_COMPILER
#pragma ivdep
#endif
    for (jj = 0; jj < nsupc; ++jj) {
        segsize = klst - usub[iukp + jj];
        if (segsize) {
#if (_OPENMP>=201307)
#pragma omp simd
#endif
            for (i = 0; i < temp_nbrow; ++i) {
                z_sub(&nzval[indirect2[i]], &nzval[indirect2[i]], &tempv[i]);
            }
            tempv += nbrow;
        }
        nzval += ldv;
    }
    
} /* zscatter_l */


static void
zscatter_u (int ib,
           int jb,
           int nsupc,
           int_t iukp,
           int_t * xsup,
           int klst,
 	   int nbrow,      /* LDA of the block in tempv[] */
           int_t lptr,     /* point to index location of block L(i,k) */
	   int temp_nbrow, /* number of rows of source block L(i,k) */
           int_t* lsub,
           int_t* usub,
           doublecomplex* tempv,
           int_t ** Ufstnz_br_ptr, doublecomplex **Unzval_br_ptr,
           gridinfo_t * grid)
{
#ifdef PI_DEBUG
    printf ("A(%d,%d) goes to U block \n", ib, jb);
#endif
    // TAU_STATIC_TIMER_START("SCATTER_U");
    // TAU_STATIC_TIMER_START("SCATTER_UB");

    int_t jj, i, fnz, rel;
    int segsize;
    doublecomplex *ucol;
    int_t ilst = FstBlockC (ib + 1);
    int_t lib = LBi (ib, grid);
    int_t *index = Ufstnz_br_ptr[lib];

    /* Reinitilize the pointers to the beginning of the k-th column/row of
     * L/U factors.
     * usub[] - index array for panel U(k,:)
     */
    int_t iuip_lib, ruip_lib;
    iuip_lib = BR_HEADER;
    ruip_lib = 0;

    int_t ijb = index[iuip_lib];
    while (ijb < jb) {   /* Search for destination block. */
        ruip_lib += index[iuip_lib + 1];
        // printf("supersize[%ld] \t:%ld \n",ijb,SuperSize( ijb ) );
        iuip_lib += UB_DESCRIPTOR + SuperSize (ijb);
        ijb = index[iuip_lib];
    }
    /* Skip descriptor. Now point to fstnz index of block U(i,j). */
    iuip_lib += UB_DESCRIPTOR;

    // tempv = bigV + (cum_nrow + cum_ncol*nbrow);
    for (jj = 0; jj < nsupc; ++jj) {
        segsize = klst - usub[iukp + jj];
        fnz = index[iuip_lib++];
        if (segsize) {          /* Nonzero segment in U(k,j). */
            ucol = &Unzval_br_ptr[lib][ruip_lib];

            // printf("========Entering loop=========\n");
#if (_OPENMP>=201307)
#pragma omp simd
#endif
            for (i = 0; i < temp_nbrow; ++i) {
                rel = lsub[lptr + i] - fnz;
                // printf("%d %d %d %d %d \n",lptr,i,fnz,temp_nbrow,nbrow );
                // printf("hello   ucol[%d] %d %d : \n",rel,lsub[lptr + i],fnz);
                z_sub(&ucol[rel], &ucol[rel], &tempv[i]);

#ifdef PI_DEBUG
                double zz = 0.0;
                if (!(*(long *) &zz == *(long *) &tempv[i]))
                    printf ("(%d, %0.3e, %0.3e ) ", rel, ucol[rel] + tempv[i],
                            ucol[rel]);
                //printing triplets (location??, old value, new value ) if none of them is zero
#endif
            } /* for i = 0:temp_nbropw */
            tempv += nbrow; /* Jump LDA to next column */
#ifdef PI_DEBUG
            // printf("\n");
#endif
        }  /* if segsize */

        ruip_lib += ilst - fnz;

    }  /* for jj = 0:nsupc */
#ifdef PI_DEBUG
    // printf("\n");
#endif
    // TAU_STATIC_TIMER_STOP("SCATTER_UB");
} /* zscatter_u */


/*Divide CPU-GPU dgemm work here*/
#ifdef PI_DEBUG
int Ngem = 2;
// int_t Ngem = 0;
int min_gpu_col = 6;
#else

    // int_t Ngem = 0;

#endif


#ifdef GPU_ACC

void
gemm_division_cpu_gpu(
    int* num_streams_used,  /*number of streams that will be used */
    int* stream_end_col,    /*array holding last column blk for each partition */
    int * ncpu_blks,        /*Number of CPU dgemm blks */
    /*input */
    int nbrow,              /*number of row in A matrix */
    int ldu,                /*number of k in dgemm */
    int nstreams, 
    int* full_u_cols,       /*array containing prefix sum of work load */
    int num_blks            /*Number of work load */
)
{
    int Ngem = sp_ienv(7);  /*get_mnk_dgemm ();*/
    int min_gpu_col = get_cublas_nb ();

    // Ngem = 1000000000;
    /*
       cpu is to gpu dgemm should be ideally 0:1 ratios to hide the total cost
       However since there is gpu latency of around 20,000 ns implying about
       200000 floating point calculation be done in that time so ~200,000/(2*nbrow*ldu)
       should be done in cpu to hide the latency; we Ngem =200,000/2 
     */
    int i, j;

    // {
    //     *num_streams_used=0;
    //     *ncpu_blks = num_blks;
    //     return;
    // }

    for (int i = 0; i < nstreams; ++i)
    {
        stream_end_col[i] = num_blks;
    }

    *ncpu_blks = 0;
    /*easy returns -1 when number of column are less than threshold */
    if (full_u_cols[num_blks - 1] < (Ngem / (nbrow * ldu)) || num_blks == 1 )
    {
        *num_streams_used = 0;
        *ncpu_blks = num_blks;
#ifdef PI_DEBUG
        printf ("full_u_cols[num_blks-1] %d  %d \n",
                full_u_cols[num_blks - 1], (Ngem / (nbrow * ldu)));
        printf ("Early return \n");
#endif
        return;

    }

    /* Easy return -2 when number of streams =0 */
    if (nstreams == 0)
    {
        *num_streams_used = 0;
        *ncpu_blks = num_blks;
        return;
        /* code */
    }
    /*find first block where count > Ngem */


    for (i = 0; i < num_blks - 1; ++i)  /*I can use binary search here */
    {
        if (full_u_cols[i + 1] > Ngem / (nbrow * ldu))
            break;
    }
    *ncpu_blks = i + 1;

    int_t cols_remain =
        full_u_cols[num_blks - 1] - full_u_cols[*ncpu_blks - 1];

#ifdef PI_DEBUG
    printf ("Remaining cols %d num_blks %d cpu_blks %d \n", cols_remain,
            num_blks, *ncpu_blks);
#endif
    if (cols_remain > 0)
    {
        *num_streams_used = 1;  /* now atleast one stream would be used */

#ifdef PI_DEBUG
        printf ("%d %d  %d %d \n", full_u_cols[num_blks - 1],
                full_u_cols[*ncpu_blks], *ncpu_blks, nstreams);
#endif
        int_t FP_MIN = 200000 / (nbrow * ldu);
        int_t cols_per_stream = SUPERLU_MAX (min_gpu_col, cols_remain / nstreams);
        cols_per_stream = SUPERLU_MAX (cols_per_stream, FP_MIN);
#ifdef PI_DEBUG
        printf ("cols_per_stream :\t%d\n", cols_per_stream);
#endif

        int_t cutoff = cols_per_stream + full_u_cols[*ncpu_blks - 1];
        for (int_t i = 0; i < nstreams; ++i)
        {
            stream_end_col[i] = num_blks;
        }
        j = *ncpu_blks;
        for (i = 0; i < nstreams - 1; ++i)
        {
            int_t st = (i == 0) ? (*ncpu_blks) : stream_end_col[i - 1];

            for (j = st; j < num_blks - 1; ++j)
            {
#ifdef PI_DEBUG
                printf ("i %d, j %d, %d  %d ", i, j, full_u_cols[j + 1],
                        cutoff);
#endif
                if (full_u_cols[j + 1] > cutoff)
                {
#ifdef PI_DEBUG
                    printf ("cutoff met \n");
#endif
                    cutoff = cols_per_stream + full_u_cols[j];
                    stream_end_col[i] = j + 1;
                    *num_streams_used += 1;
                    j++;
                    break;
                }
#ifdef PI_DEBUG
                printf ("\n");
#endif
            }

        }

    }
}

void
gemm_division_new (int * num_streams_used,   /*number of streams that will be used */
                   int * stream_end_col, /*array holding last column blk for each partition */
                   int * ncpu_blks,  /*Number of CPU dgemm blks */
                        /*input */
                   int nbrow,    /*number of row in A matrix */
                   int ldu,  /*number of k in dgemm */
                   int nstreams,
                   Ublock_info_t *Ublock_info,    /*array containing prefix sum of work load */
                   int num_blks  /*Number of work load */
    )
{
    int Ngem = sp_ienv(7); /*get_mnk_dgemm ();*/
    int min_gpu_col = get_cublas_nb ();

    // Ngem = 1000000000;
    /*
       cpu is to gpu dgemm should be ideally 0:1 ratios to hide the total cost
       However since there is gpu latency of around 20,000 ns implying about
       200000 floating point calculation be done in that time so ~200,000/(2*nbrow*ldu)
       should be done in cpu to hide the latency; we Ngem =200,000/2 
     */
    int_t i, j;


    for (int i = 0; i < nstreams; ++i)
    {
        stream_end_col[i] = num_blks;
    }

    *ncpu_blks = 0;
    /*easy returns -1 when number of column are less than threshold */
    if (Ublock_info[num_blks - 1].full_u_cols < (Ngem / (nbrow * ldu)) || num_blks == 1)
    {
        *num_streams_used = 0;
        *ncpu_blks = num_blks;

        return;

    }

    /* Easy return -2 when number of streams =0 */
    if (nstreams == 0)
    {
        *num_streams_used = 0;
        *ncpu_blks = num_blks;
        return;
        /* code */
    }
    /*find first block where count > Ngem */


    for (i = 0; i < num_blks - 1; ++i)  /*I can use binary search here */
    {
        if (Ublock_info[i + 1].full_u_cols > Ngem / (nbrow * ldu))
            break;
    }
    *ncpu_blks = i + 1;

    int_t cols_remain =
       Ublock_info [num_blks - 1].full_u_cols - Ublock_info[*ncpu_blks - 1].full_u_cols;

    if (cols_remain > 0)
    {
        *num_streams_used = 1;  /* now atleast one stream would be used */

        int_t FP_MIN = 200000 / (nbrow * ldu);
        int_t cols_per_stream = SUPERLU_MAX (min_gpu_col, cols_remain / nstreams);
        cols_per_stream = SUPERLU_MAX (cols_per_stream, FP_MIN);

        int_t cutoff = cols_per_stream + Ublock_info[*ncpu_blks - 1].full_u_cols;
        for (int_t i = 0; i < nstreams; ++i)
        {
            stream_end_col[i] = num_blks;
        }
        j = *ncpu_blks;
        for (i = 0; i < nstreams - 1; ++i)
        {
            int_t st = (i == 0) ? (*ncpu_blks) : stream_end_col[i - 1];

            for (j = st; j < num_blks - 1; ++j)
            {
                if (Ublock_info[j + 1].full_u_cols > cutoff)
                {

                    cutoff = cols_per_stream + Ublock_info[j].full_u_cols;
                    stream_end_col[i] = j + 1;
                    *num_streams_used += 1;
                    j++;
                    break;
                }

            }

        }

    }
}

#endif  /* defined GPU_ACC */
SuperLU_DIST_5.3.0/SRC/superlu_defs.h0000644013363400111340000007162613233431301016150 0ustar  xiaoyessg/*! \file
Copyright (c) 2003, The Regents of the University of California, through
Lawrence Berkeley National Laboratory (subject to receipt of any required 
approvals from U.S. Dept. of Energy) 

All rights reserved. 

The source code is distributed under BSD license, see the file License.txt
at the top-level directory.
*/
/*! @file
 * \brief Definitions which are precision-neutral
 *
 * 
 * -- Distributed SuperLU routine (version 5.2) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * November 1, 2007
 *
 * Modified:
 *     Feburary 20, 2008
 *     October 11, 2014
 * 
*/ #ifndef __SUPERLU_DEFS /* allow multiple inclusions */ #define __SUPERLU_DEFS /* * File name: superlu_defs.h * Purpose: Definitions which are precision-neutral */ #ifdef _CRAY #include #endif #ifdef _OPENMP #include #endif #include #include #include #include #include /* Following is for vtune */ #if 0 #include #define USE_VTUNE #endif /************************************************************************* * Constants **************************************************************************/ /* * You can support older version of SuperLU_DIST. * At compile-time, you can catch the new release as: * #ifdef SUPERLU_DIST_MAIN_VERSION == 5 * use the new interface * #else * use the old interface * #endif * Versions 4.x and earlier do not include a #define'd version numbers. */ #define SUPERLU_DIST_MAJOR_VERSION 5 #define SUPERLU_DIST_MINOR_VERSION 3 #define SUPERLU_DIST_PATCH_VERSION 0 #define SUPERLU_DIST_RELEASE_DATE "January 28, 2018" #include "superlu_dist_config.h" /* Define my integer size int_t */ #ifdef _CRAY typedef short int_t; /*#undef int Revert back to int of default size. */ #define mpi_int_t MPI_SHORT #elif defined (_LONGINT) typedef long long int int_t; #define mpi_int_t MPI_LONG_LONG_INT #define IFMT "%lld" #else /* Default */ typedef int int_t; #define mpi_int_t MPI_INT #define IFMT "%8d" #endif #include "superlu_enum_consts.h" #include "Cnames.h" #include "supermatrix.h" #include "util_dist.h" #include "psymbfact.h" #define ISORT /* NOTE: qsort() has bug on Mac */ /*********************************************************************** * Constants ***********************************************************************/ /* * For each block column of L, the index[] array contains both the row * subscripts and the integers describing the size of the blocks. * The organization of index[] looks like: * * [ BLOCK COLUMN HEADER (size BC_HEADER) * number of blocks * number of row subscripts, i.e., LDA of nzval[] * BLOCK 0 <---- * BLOCK DESCRIPTOR (of size LB_DESCRIPTOR) | * block number (global) | * number of full rows in the block | * actual row subscripts | * BLOCK 1 | Repeat ... * BLOCK DESCRIPTOR | number of blocks * block number (global) | * number of full rows in the block | * actual row subscripts | * . | * . | * . <---- * ] * * For each block row of U, the organization of index[] looks like: * * [ BLOCK ROW HEADER (of size BR_HEADER) * number of blocks * number of entries in nzval[] * number of entries in index[] * BLOCK 0 <---- * BLOCK DESCRIPTOR (of size UB_DESCRIPTOR) | * block number (global) | * number of nonzeros in the block | * actual fstnz subscripts | * BLOCK 1 | Repeat ... * BLOCK DESCRIPTOR | number of blocks * block number (global) | * number of nonzeros in the block | * actual fstnz subscripts | * . | * . | * . <---- * ] * */ #define BC_HEADER 2 #define LB_DESCRIPTOR 2 #define BR_HEADER 3 #define UB_DESCRIPTOR 2 #define NBUFFERS 5 /* * Communication tags */ /* Return the mpi_tag assuming 5 pairs of communications and MPI_TAG_UB >= 5 * * for each supernodal column "num", the five communications are: * * 0,1: for sending L to "right" * * 2,3: for sending off-diagonal blocks of U "down" * * 4 : for sending the diagonal blcok down (in pxgstrf2) */ #define SLU_MPI_TAG(id,num) ( (5*(num)+id) % tag_ub ) /* For numeric factorization. */ #if 0 #define NTAGS 10000 #else #define NTAGS INT_MAX #endif #define UjROW 10 #define UkSUB 11 #define UkVAL 12 #define LkSUB 13 #define LkVAL 14 #define LkkDIAG 15 /* For triangular solves. */ #define XK_H 2 /* The header preceding each X block. */ #define LSUM_H 2 /* The header preceding each MOD block. */ #define GSUM 20 #define Xk 21 #define Yk 22 #define LSUM 23 /* * Communication scopes */ #define COMM_ALL 100 #define COMM_COLUMN 101 #define COMM_ROW 102 /* * Matrix distribution for sparse matrix-vector multiplication */ #define SUPER_LINEAR 11 #define SUPER_BLOCK 12 /* * No of marker arrays used in the symbolic factorization, each of size n */ #define NO_MARKER 3 /*********************************************************************** * Macros ***********************************************************************/ #define IAM(comm) { int rank; MPI_Comm_rank ( comm, &rank ); rank}; #define MYROW(iam,grid) ( (iam) / grid->npcol ) #define MYCOL(iam,grid) ( (iam) % grid->npcol ) #define BlockNum(i) ( supno[i] ) #define FstBlockC(bnum) ( xsup[bnum] ) #define SuperSize(bnum) ( xsup[bnum+1]-xsup[bnum] ) #define LBi(bnum,grid) ( (bnum)/grid->nprow )/* Global to local block rowwise */ #define LBj(bnum,grid) ( (bnum)/grid->npcol )/* Global to local block columnwise*/ #define PROW(bnum,grid) ( (bnum) % grid->nprow ) #define PCOL(bnum,grid) ( (bnum) % grid->npcol ) #define PNUM(i,j,grid) ( (i)*grid->npcol + j ) /* Process number at coord(i,j) */ #define CEILING(a,b) ( ((a)%(b)) ? ((a)/(b) + 1) : ((a)/(b)) ) /* For triangular solves */ #define RHS_ITERATE(i) \ for (i = 0; i < nrhs; ++i) #define X_BLK(i) \ ilsum[i] * nrhs + (i+1) * XK_H #define LSUM_BLK(i) \ ilsum[i] * nrhs + (i+1) * LSUM_H #define SuperLU_timer_ SuperLU_timer_dist_ #define LOG2(x) (log10((double) x) / log10(2.0)) #if ( VAMPIR>=1 ) #define VT_TRACEON VT_traceon() #define VT_TRACEOFF VT_traceoff() #else #define VT_TRACEON #define VT_TRACEOFF #endif /* Support Windows */ #ifndef SUPERLU_DIST_EXPORT #if MSVC #ifdef SUPERLU_DIST_EXPORTS #define SUPERLU_DIST_EXPORT __declspec(dllexport) #else #define SUPERLU_DIST_EXPORT __declspec(dllimport) #endif /* SUPERLU_DIST_EXPORTS */ #else #define SUPERLU_DIST_EXPORT #endif /* MSVC */ #endif /* SUPERLU_DIST_EXPORT */ /*********************************************************************** * New data types ***********************************************************************/ /* * Define the 2D mapping of matrix blocks to process grid. * * Process grid: * Processes are numbered (0 : P-1). * P = Pr x Pc, where Pr, Pc are the number of process rows and columns. * (pr,pc) is the coordinate of IAM; 0 <= pr < Pr, 0 <= pc < Pc. * * Matrix blocks: * Matrix is partitioned according to supernode partitions, both * column and row-wise. * The k-th block columns (rows) contains columns (rows) (s:t), where * s=xsup[k], t=xsup[k+1]-1. * Block A(I,J) contains * rows from (xsup[I]:xsup[I+1]-1) and * columns from (xsup[J]:xsup[J+1]-1) * * Mapping of matrix entry (i,j) to matrix block (I,J): * (I,J) = ( supno[i], supno[j] ) * * Mapping of matrix block (I,J) to process grid (pr,pc): * (pr,pc) = ( MOD(I,NPROW), MOD(J,NPCOL) ) * * (xsup[nsupers],supno[n]) are replicated on all processors. * */ /*-- Communication subgroup */ typedef struct { MPI_Comm comm; /* MPI communicator */ int Np; /* number of processes */ int Iam; /* my process number */ } superlu_scope_t; /*-- Process grid definition */ typedef struct { MPI_Comm comm; /* MPI communicator */ superlu_scope_t rscp; /* process scope in rowwise, horizontal directon */ superlu_scope_t cscp; /* process scope in columnwise, vertical direction */ int iam; /* my process number in this scope */ int_t nprow; /* number of process rows */ int_t npcol; /* number of process columns */ } gridinfo_t; /* *-- The structures are determined by SYMBFACT and used thereafter. * * (xsup,supno) describes mapping between supernode and column: * xsup[s] is the leading column of the s-th supernode. * supno[i] is the supernode no to which column i belongs; * e.g. supno 0 1 2 2 3 3 3 4 4 4 4 4 (n=12) * xsup 0 1 2 4 7 12 * Note: dfs will be performed on supernode rep. relative to the new * row pivoting ordering * * This is allocated during symbolic factorization SYMBFACT. */ typedef struct { int_t *xsup; int_t *supno; } Glu_persist_t; /* *-- The structures are determined by SYMBFACT and used by DDISTRIBUTE. * * (xlsub,lsub): lsub[*] contains the compressed subscript of * rectangular supernodes; xlsub[j] points to the starting * location of the j-th column in lsub[*]. Note that xlsub * is indexed by column. * Storage: original row subscripts * * During the course of sparse LU factorization, we also use * (xlsub,lsub) for the purpose of symmetric pruning. For each * supernode {s,s+1,...,t=s+r} with first column s and last * column t, the subscript set * lsub[j], j=xlsub[s], .., xlsub[s+1]-1 * is the structure of column s (i.e. structure of this supernode). * It is used for the storage of numerical values. * Furthermore, * lsub[j], j=xlsub[t], .., xlsub[t+1]-1 * is the structure of the last column t of this supernode. * It is for the purpose of symmetric pruning. Therefore, the * structural subscripts can be rearranged without making physical * interchanges among the numerical values. * * However, if the supernode has only one column, then we * only keep one set of subscripts. For any subscript interchange * performed, similar interchange must be done on the numerical * values. * * The last column structures (for pruning) will be removed * after the numercial LU factorization phase. * * (xusub,usub): xusub[i] points to the starting location of column i * in usub[]. For each U-segment, only the row index of first nonzero * is stored in usub[]. * * Each U column consists of a number of full segments. Each full segment * starts from a leading nonzero, running up to the supernode (block) * boundary. (Recall that the column-wise supernode partition is also * imposed on the rows.) Because the segment is full, we don't store all * the row indices. Instead, only the leading nonzero index is stored. * The rest can be found together with xsup/supno pair. * For example, * usub[xsub[j+1]] - usub[xsub[j]] = number of segments in column j. * for any i in usub[], * supno[i] = block number in which i belongs to * xsup[supno[i]+1] = first row of the next block * The nonzeros of this segment are: * i, i+1 ... xsup[supno[i]+1]-1 (only i is stored in usub[]) * */ typedef struct { int_t *lsub; /* compressed L subscripts */ int_t *xlsub; int_t *usub; /* compressed U subscripts */ int_t *xusub; int_t nzlmax; /* current max size of lsub */ int_t nzumax; /* " " " usub */ LU_space_t MemModel; /* 0 - system malloc'd; 1 - user provided */ int_t *llvl; /* keep track of level in L for level-based ILU */ int_t *ulvl; /* keep track of level in U for level-based ILU */ } Glu_freeable_t; /* *-- The structure used to store matrix A of the linear system and * several vectors describing the transformations done to matrix A. * * A (SuperMatrix*) * Matrix A in A*X=B, of dimension (A->nrow, A->ncol). * The number of linear equations is A->nrow. The type of A can be: * Stype = SLU_NC; Dtype = SLU_D; Mtype = SLU_GE. * * DiagScale (DiagScale_t) * Specifies the form of equilibration that was done. * = NOEQUIL: No equilibration. * = ROW: Row equilibration, i.e., A was premultiplied by diag(R). * = COL: Column equilibration, i.e., A was postmultiplied by diag(C). * = BOTH: Both row and column equilibration, i.e., A was replaced * by diag(R)*A*diag(C). * * R double*, dimension (A->nrow) * The row scale factors for A. * If DiagScale = ROW or BOTH, A is multiplied on the left by diag(R). * If DiagScale = NOEQUIL or COL, R is not defined. * * C double*, dimension (A->ncol) * The column scale factors for A. * If DiagScale = COL or BOTH, A is multiplied on the right by diag(C). * If DiagScale = NOEQUIL or ROW, C is not defined. * * perm_r (int*) dimension (A->nrow) * Row permutation vector which defines the permutation matrix Pr, * perm_r[i] = j means row i of A is in position j in Pr*A. * * perm_c (int*) dimension (A->ncol) * Column permutation vector, which defines the * permutation matrix Pc; perm_c[i] = j means column i of A is * in position j in A*Pc. * */ typedef struct { DiagScale_t DiagScale; double *R; double *C; int_t *perm_r; int_t *perm_c; } ScalePermstruct_t; /*-- Data structure for redistribution of B and X --*/ typedef struct { int *B_to_X_SendCnt; int *X_to_B_SendCnt; int *ptr_to_ibuf, *ptr_to_dbuf; /* the following are needed in the hybrid solver PDSLin */ int *X_to_B_iSendCnt; int *X_to_B_vSendCnt; int *disp_ibuf; int_t *send_ibuf; void *send_dbuf; int_t x2b, b2x; int_t *send_ibuf2; int_t *recv_ibuf2; void *send_dbuf2; void *recv_dbuf2; } pxgstrs_comm_t; /* *-- This contains the options used to control the solution process. * * Fact (fact_t) * Specifies whether or not the factored form of the matrix * A is supplied on entry, and if not, how the matrix A should * be factorizaed. * = DOFACT: The matrix A will be factorized from scratch, and the * factors will be stored in L and U. * = SamePattern: The matrix A will be factorized assuming * that a factorization of a matrix with the same sparsity * pattern was performed prior to this one. Therefore, this * factorization will reuse column permutation vector * ScalePermstruct->perm_c and the column elimination tree * LUstruct->etree. * = SamePattern_SameRowPerm: The matrix A will be factorized * assuming that a factorization of a matrix with the same * sparsity pattern and similar numerical values was performed * prior to this one. Therefore, this factorization will reuse * both row and column scaling factors R and C, both row and * column permutation vectors perm_r and perm_c, and the * data structure set up from the previous symbolic factorization. * = FACTORED: On entry, L, U, perm_r and perm_c contain the * factored form of A. If DiagScale is not NOEQUIL, the matrix * A has been equilibrated with scaling factors R and C. * * Equil (yes_no_t) * Specifies whether to equilibrate the system (scale A's row and * columns to have unit norm). * * ColPerm (colperm_t) * Specifies what type of column permutation to use to reduce fill. * = NATURAL: use the natural ordering * = MMD_ATA: use minimum degree ordering on structure of A'*A * = MMD_AT_PLUS_A: use minimum degree ordering on structure of A'+A * = COLAMD: use approximate minimum degree column ordering * = MY_PERMC: use the ordering specified by the user * * Trans (trans_t) * Specifies the form of the system of equations: * = NOTRANS: A * X = B (No transpose) * = TRANS: A**T * X = B (Transpose) * = CONJ: A**H * X = B (Transpose) * * IterRefine (IterRefine_t) * Specifies whether to perform iterative refinement. * = NO: no iterative refinement * = SINGLE: perform iterative refinement in single precision * = DOUBLE: perform iterative refinement in double precision * = EXTRA: perform iterative refinement in extra precision * * DiagPivotThresh (double, in [0.0, 1.0]) (only for serial SuperLU) * Specifies the threshold used for a diagonal entry to be an * acceptable pivot. * * SymmetricMode (yest_no_t) (only for serial SuperLU) * Specifies whether to use symmetric mode. Symmetric mode gives * preference to diagonal pivots, and uses an (A'+A)-based column * permutation algorithm. * * PivotGrowth (yes_no_t) (only for serial SuperLU) * Specifies whether to compute the reciprocal pivot growth. * * ConditionNumber (ues_no_t) (only for serial SuperLU) * Specifies whether to compute the reciprocal condition number. * * RowPerm (rowperm_t) (only for SuperLU_DIST or ILU in serial SuperLU) * Specifies whether to permute rows of the original matrix. * = NO: not to permute the rows * = LargeDiag: make the diagonal large relative to the off-diagonal * = MY_PERMR: use the permutation given by the user * * ILU_DropRule (int) (only for serial SuperLU) * Specifies the dropping rule: * = DROP_BASIC: Basic dropping rule, supernodal based ILUTP(tau). * = DROP_PROWS: Supernodal based ILUTP(p,tau), p = gamma * nnz(A)/n. * = DROP_COLUMN: Variant of ILUTP(p,tau), for j-th column, * p = gamma * nnz(A(:,j)). * = DROP_AREA: Variation of ILUTP, for j-th column, use * nnz(F(:,1:j)) / nnz(A(:,1:j)) to control memory. * = DROP_DYNAMIC: Modify the threshold tau during factorizaion: * If nnz(L(:,1:j)) / nnz(A(:,1:j)) > gamma * tau_L(j) := MIN(tau_0, tau_L(j-1) * 2); * Otherwise * tau_L(j) := MAX(tau_0, tau_L(j-1) / 2); * tau_U(j) uses the similar rule. * NOTE: the thresholds used by L and U are separate. * = DROP_INTERP: Compute the second dropping threshold by * interpolation instead of sorting (default). * In this case, the actual fill ratio is not * guaranteed to be smaller than gamma. * Note: DROP_PROWS, DROP_COLUMN and DROP_AREA are mutually exclusive. * ( Default: DROP_BASIC | DROP_AREA ) * * ILU_DropTol (double) (only for serial SuperLU) * numerical threshold for dropping. * * ILU_FillFactor (double) (only for serial SuperLU) * Gamma in the secondary dropping. * * ILU_Norm (norm_t) (only for serial SuperLU) * Specify which norm to use to measure the row size in a * supernode: infinity-norm, 1-norm, or 2-norm. * * ILU_FillTol (double) (only for serial SuperLU) * numerical threshold for zero pivot perturbation. * * ILU_MILU (milu_t) (only for serial SuperLU) * Specifies which version of MILU to use. * * ILU_MILU_Dim (double) * Dimension of the PDE if available. * * ReplaceTinyPivot (yes_no_t) (only for SuperLU_DIST) * Specifies whether to replace the tiny diagonals by * sqrt(epsilon)*||A|| during LU factorization. * * SolveInitialized (yes_no_t) (only for SuperLU_DIST) * Specifies whether the initialization has been performed to the * triangular solve. * * RefineInitialized (yes_no_t) (only for SuperLU_DIST) * Specifies whether the initialization has been performed to the * sparse matrix-vector multiplication routine needed in iterative * refinement. * * num_lookaheads (int) (only for SuperLU_DIST) * Specifies the number of levels in the look-ahead factorization * * lookahead_etree (yes_no_t) (only for SuperLU_DIST) * Specifies whether to use the elimination tree computed from the * serial symbolic factorization to perform scheduling. * * SymPattern (yes_no_t) (only for SuperLU_DIST) * Gives the scheduling algorithm a hint whether the matrix * would have symmetric pattern. * */ typedef struct { fact_t Fact; yes_no_t Equil; colperm_t ColPerm; trans_t Trans; IterRefine_t IterRefine; double DiagPivotThresh; yes_no_t SymmetricMode; yes_no_t PivotGrowth; yes_no_t ConditionNumber; rowperm_t RowPerm; int ILU_DropRule; double ILU_DropTol; /* threshold for dropping */ double ILU_FillFactor; /* gamma in the secondary dropping */ norm_t ILU_Norm; /* infinity-norm, 1-norm, or 2-norm */ double ILU_FillTol; /* threshold for zero pivot perturbation */ milu_t ILU_MILU; double ILU_MILU_Dim; /* Dimension of PDE (if available) */ yes_no_t ParSymbFact; yes_no_t ReplaceTinyPivot; /* used in SuperLU_DIST */ yes_no_t SolveInitialized; yes_no_t RefineInitialized; yes_no_t PrintStat; int nnzL, nnzU; /* used to store nnzs for now */ int num_lookaheads; /* num of levels in look-ahead */ yes_no_t lookahead_etree; /* use etree computed from the serial symbolic factorization */ yes_no_t SymPattern; /* symmetric factorization */ } superlu_dist_options_t; typedef struct { float for_lu; float total; int_t expansions; long long int nnzL, nnzU; } superlu_dist_mem_usage_t; /* *-- The new structures added in the hybrid CUDA + OpenMP + MPI code. */ typedef struct { int_t rukp; int_t iukp; int_t jb; int_t full_u_cols; } Ublock_info_t; typedef struct { int_t lptr; int_t ib; int_t FullRow; } Remain_info_t; typedef struct { int id, key; void *next; } etree_node; struct superlu_pair { int ind; int val; }; /**--------**/ /*********************************************************************** * Function prototypes ***********************************************************************/ #ifdef __cplusplus extern "C" { #endif extern void set_default_options_dist(superlu_dist_options_t *); extern void superlu_gridinit(MPI_Comm, int_t, int_t, gridinfo_t *); extern void superlu_gridmap(MPI_Comm, int_t, int_t, int_t [], int_t, gridinfo_t *); extern void superlu_gridexit(gridinfo_t *); extern void print_options_dist(superlu_dist_options_t *); extern void print_sp_ienv_dist(superlu_dist_options_t *); extern void Destroy_CompCol_Matrix_dist(SuperMatrix *); extern void Destroy_SuperNode_Matrix_dist(SuperMatrix *); extern void Destroy_SuperMatrix_Store_dist(SuperMatrix *); extern void Destroy_CompCol_Permuted_dist(SuperMatrix *); extern void Destroy_CompRowLoc_Matrix_dist(SuperMatrix *); extern void Destroy_CompRow_Matrix_dist(SuperMatrix *); extern void sp_colorder (superlu_dist_options_t*, SuperMatrix*, int_t*, int_t*, SuperMatrix*); extern int sp_symetree_dist(int_t *, int_t *, int_t *, int_t, int_t *); extern int sp_coletree_dist (int_t *, int_t *, int_t *, int_t, int_t, int_t *); extern void get_perm_c_dist(int_t, int_t, SuperMatrix *, int_t *); extern void at_plus_a_dist(const int_t, const int_t, int_t *, int_t *, int_t *, int_t **, int_t **); extern int genmmd_dist_(int_t *, int_t *, int_t *a, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *); extern void bcast_tree(void *, int, MPI_Datatype, int, int, gridinfo_t *, int, int *); extern int_t symbfact(superlu_dist_options_t *, int, SuperMatrix *, int_t *, int_t *, Glu_persist_t *, Glu_freeable_t *); extern int_t symbfact_SubInit(fact_t, void *, int_t, int_t, int_t, int_t, Glu_persist_t *, Glu_freeable_t *); extern int_t symbfact_SubXpand(int_t, int_t, int_t, MemType, int_t *, Glu_freeable_t *); extern int_t symbfact_SubFree(Glu_freeable_t *); extern void countnz_dist (const int_t, int_t *, long long int *, long long int *, Glu_persist_t *, Glu_freeable_t *); extern long long int fixupL_dist (const int_t, const int_t *, Glu_persist_t *, Glu_freeable_t *); extern int_t *TreePostorder_dist (int_t, int_t *); extern float smach_dist(char *); extern double dmach_dist(char *); extern void *superlu_malloc_dist (size_t); extern void superlu_free_dist (void*); extern int_t *intMalloc_dist (int_t); extern int_t *intCalloc_dist (int_t); extern int_t mc64id_dist(int_t *); extern void arrive_at_ublock (int_t, int_t *, int_t *, int_t *, int_t *, int_t *, int_t, int_t, int_t *, int_t *, int_t *, gridinfo_t *); extern int_t estimate_bigu_size (int_t, int_t, int_t **, Glu_persist_t *, gridinfo_t *, int_t *); /* Auxiliary routines */ extern double SuperLU_timer_ (); extern void superlu_abort_and_exit_dist(char *); extern int_t sp_ienv_dist (int_t); extern void ifill_dist (int_t *, int_t, int_t); extern void super_stats_dist (int_t, int_t *); extern void ScalePermstructInit(const int_t, const int_t, ScalePermstruct_t *); extern void ScalePermstructFree(ScalePermstruct_t *); extern void get_diag_procs(int_t, Glu_persist_t *, gridinfo_t *, int_t *, int_t **, int_t **); extern int_t QuerySpace_dist(int_t, int_t, Glu_freeable_t *, superlu_dist_mem_usage_t *); extern int xerr_dist (char *, int *); extern void pxerr_dist (char *, gridinfo_t *, int_t); extern void PStatInit(SuperLUStat_t *); extern void PStatFree(SuperLUStat_t *); extern void PStatPrint(superlu_dist_options_t *, SuperLUStat_t *, gridinfo_t *); extern void log_memory(long long, SuperLUStat_t *); extern void print_memorylog(SuperLUStat_t *, char *); extern int superlu_dist_GetVersionNumber(int *, int *, int *); /* Prototypes for parallel symbolic factorization */ extern float symbfact_dist (int, int, SuperMatrix *, int_t *, int_t *, int_t *, int_t *, Pslu_freeable_t *, MPI_Comm *, MPI_Comm *, superlu_dist_mem_usage_t *); /* Get the column permutation using parmetis */ extern float get_perm_c_parmetis (SuperMatrix *, int_t *, int_t *, int, int, int_t **, int_t **, gridinfo_t *, MPI_Comm *); /* Auxiliary routines for memory expansions used during the parallel symbolic factorization routine */ extern int_t psymbfact_LUXpandMem (int_t, int_t, int_t, int_t, int_t, int_t, int_t, int_t, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *); extern int_t psymbfact_LUXpand (int_t, int_t, int_t, int_t, int_t *, int_t, int_t, int_t, int_t, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *); extern int_t psymbfact_LUXpand_RL (int_t, int_t, int_t, int_t, int_t, int_t, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *); extern int_t psymbfact_prLUXpand (int_t, int_t, int, Llu_symbfact_t *, psymbfact_stat_t *); #ifdef GPU_ACC /* GPU related */ extern void gemm_division_cpu_gpu (int *, int *, int *, int, int, int, int *, int); extern int_t get_cublas_nb (); extern int_t get_num_cuda_streams (); #endif extern int get_thread_per_process(); extern int_t get_max_buffer_size (); extern int_t get_min (int_t *, int_t); extern int compare_pair (const void *, const void *); extern int_t static_partition (struct superlu_pair *, int_t, int_t *, int_t, int_t *, int_t *, int); /* Routines for debugging */ extern void print_panel_seg_dist(int_t, int_t, int_t, int_t, int_t *, int_t *); extern void check_repfnz_dist(int_t, int_t, int_t, int_t *); extern int_t CheckZeroDiagonal(int_t, int_t *, int_t *, int_t *); extern void PrintDouble5(char *, int_t, double *); extern void PrintInt10(char *, int_t, int_t *); extern void PrintInt32(char *, int, int *); extern int file_PrintInt10(FILE *, char *, int_t, int_t *); extern int file_PrintInt32(FILE *, char *, int, int *); extern int file_PrintLong10(FILE *, char *, int_t, int_t *); #ifdef __cplusplus } #endif #endif /* __SUPERLU_DEFS */ SuperLU_DIST_5.3.0/SRC/zreadtriple.c0000644013363400111340000001026413233431301015757 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief * */ #include #include "superlu_zdefs.h" #undef EXPAND_SYM /*! brief * *
 * Output parameters
 * =================
 *   (nzval, rowind, colptr): (*rowind)[*] contains the row subscripts of
 *      nonzeros in columns of matrix A; (*nzval)[*] the numerical values;
 *	column i of A is given by (*nzval)[k], k = (*rowind)[i],...,
 *      (*rowind)[i+1]-1.
 * 
*/ void zreadtriple_dist(FILE *fp, int_t *m, int_t *n, int_t *nonz, doublecomplex **nzval, int_t **rowind, int_t **colptr) { int_t j, k, jsize, nnz, nz, new_nonz; doublecomplex *a, *val; int_t *asub, *xa, *row, *col; int_t zero_base = 0; /* File format: * First line: #rows #non-zero * Triplet in the rest of lines: * row col value */ #ifdef _LONGINT fscanf(fp, "%ld%ld%ld", m, n, nonz); #else fscanf(fp, "%d%d%d", m, n, nonz); #endif #ifdef EXPAND_SYM new_nonz = 2 * *nonz - *n; #else new_nonz = *nonz; #endif *m = *n; printf("m %lld, n %lld, nonz %lld\n", (long long) *m, (long long) *n, (long long) *nonz); zallocateA_dist(*n, new_nonz, nzval, rowind, colptr); /* Allocate storage */ a = *nzval; asub = *rowind; xa = *colptr; if ( !(val = (doublecomplex *) SUPERLU_MALLOC(new_nonz * sizeof(doublecomplex))) ) ABORT("Malloc fails for val[]"); if ( !(row = (int_t *) SUPERLU_MALLOC(new_nonz * sizeof(int_t))) ) ABORT("Malloc fails for row[]"); if ( !(col = (int_t *) SUPERLU_MALLOC(new_nonz * sizeof(int_t))) ) ABORT("Malloc fails for col[]"); for (j = 0; j < *n; ++j) xa[j] = 0; /* Read into the triplet array from a file */ for (nnz = 0, nz = 0; nnz < *nonz; ++nnz) { #ifdef _LONGINT fscanf(fp, "%ld%ld%lf%lf\n", &row[nz], &col[nz], &val[nz].r, &val[nz].i); #else fscanf(fp, "%d%d%lf%lf\n", &row[nz], &col[nz], &val[nz].r, &val[nz].i); #endif if ( nnz == 0 ) /* first nonzero */ if ( row[0] == 0 || col[0] == 0 ) { zero_base = 1; printf("triplet file: row/col indices are zero-based.\n"); } else printf("triplet file: row/col indices are one-based.\n"); if ( !zero_base ) { /* Change to 0-based indexing. */ --row[nz]; --col[nz]; } if (row[nz] < 0 || row[nz] >= *m || col[nz] < 0 || col[nz] >= *n /*|| val[nz] == 0.*/) { fprintf(stderr, "nz " IFMT ", (" IFMT ", " IFMT ") = {%e\t%e} out of bound, removed\n", nz, row[nz], col[nz], val[nz].r, val[nz].i); exit(-1); } else { ++xa[col[nz]]; #ifdef EXPAND_SYM if ( row[nz] != col[nz] ) { /* Excluding diagonal */ ++nz; row[nz] = col[nz-1]; col[nz] = row[nz-1]; val[nz] = val[nz-1]; ++xa[col[nz]]; } #endif ++nz; } } *nonz = nz; #ifdef EXPAND_SYM printf("new_nonz after symmetric expansion:\t%d\n", *nonz); #endif /* Initialize the array of column pointers */ k = 0; jsize = xa[0]; xa[0] = 0; for (j = 1; j < *n; ++j) { k += jsize; jsize = xa[j]; xa[j] = k; } /* Copy the triplets into the column oriented storage */ for (nz = 0; nz < *nonz; ++nz) { j = col[nz]; k = xa[j]; asub[k] = row[nz]; a[k] = val[nz]; ++xa[j]; } /* Reset the column pointers to the beginning of each column */ for (j = *n; j > 0; --j) xa[j] = xa[j-1]; xa[0] = 0; SUPERLU_FREE(val); SUPERLU_FREE(row); SUPERLU_FREE(col); #ifdef CHK_INPUT int i; for (i = 0; i < *n; i++) { printf("Col %d, xa %d\n", i, xa[i]); for (k = xa[i]; k < xa[i+1]; k++) printf("%d\t%16.10f\n", asub[k], a[k]); } #endif } void zreadrhs(int m, doublecomplex *b) { FILE *fp, *fopen(); int i; if ( !(fp = fopen("b.dat", "r")) ) { fprintf(stderr, "zreadrhs: file does not exist\n"); exit(-1); } for (i = 0; i < m; ++i) fscanf(fp, "%lf%lf\n", &(b[i].r), &(b[i].i)); fclose(fp); } SuperLU_DIST_5.3.0/SRC/pxerr_dist.c0000644013363400111340000000146613233431301015621 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief * *
 * -- Distributed SuperLU routine (version 4.3) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 *
 * Modified: November 21, 1999
 *
 * 
*/ #include "superlu_ddefs.h" /* pxerbla */ void pxerr_dist(char *srname, gridinfo_t *grid, int_t info) { printf("{" IFMT "," IFMT "}: On entry to %6s, parameter number " IFMT " had an illegal value\n", MYROW(grid->iam, grid), MYCOL(grid->iam, grid), srname, info); } SuperLU_DIST_5.3.0/SRC/supermatrix.h0000644013363400111340000001756013233431301016030 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Matrix type definitions */ #ifndef __SUPERLU_SUPERMATRIX /* allow multiple inclusions */ #define __SUPERLU_SUPERMATRIX /******************************************** * The matrix types are defined as follows. * ********************************************/ typedef enum { SLU_NC, /* column-wise, no supernode */ SLU_NCP, /* column-wise, column-permuted, no supernode (The consecutive columns of nonzeros, after permutation, may not be stored contiguously.) */ SLU_NR, /* row-wize, no supernode */ SLU_SC, /* column-wise, supernode */ SLU_SCP, /* supernode, column-wise, permuted */ SLU_SR, /* row-wise, supernode */ SLU_DN, /* Fortran style column-wise storage for dense matrix */ SLU_NR_loc /* distributed compressed row format */ } Stype_t; typedef enum { SLU_S, /* single */ SLU_D, /* double */ SLU_C, /* single complex */ SLU_Z /* double complex */ } Dtype_t; typedef enum { SLU_GE, /* general */ SLU_TRLU, /* lower triangular, unit diagonal */ SLU_TRUU, /* upper triangular, unit diagonal */ SLU_TRL, /* lower triangular */ SLU_TRU, /* upper triangular */ SLU_SYL, /* symmetric, store lower half */ SLU_SYU, /* symmetric, store upper half */ SLU_HEL, /* Hermitian, store lower half */ SLU_HEU /* Hermitian, store upper half */ } Mtype_t; typedef struct { Stype_t Stype; /* Storage type: interprets the storage structure pointed to by *Store. */ Dtype_t Dtype; /* Data type. */ Mtype_t Mtype; /* Matrix type: describes the mathematical property of the matrix. */ int_t nrow; /* number of rows */ int_t ncol; /* number of columns */ void *Store; /* pointer to the actual storage of the matrix */ } SuperMatrix; /*********************************************** * The storage schemes are defined as follows. * ***********************************************/ /* Stype == SLU_NC (Also known as Harwell-Boeing sparse matrix format) */ typedef struct { int_t nnz; /* number of nonzeros in the matrix */ void *nzval; /* pointer to array of nonzero values, packed by column */ int_t *rowind; /* pointer to array of row indices of the nonzeros */ int_t *colptr; /* pointer to array of beginning of columns in nzval[] and rowind[] */ /* Note: Zero-based indexing is used; colptr[] has ncol+1 entries, the last one pointing beyond the last column, so that colptr[ncol] = nnz. */ } NCformat; /* Stype == SLU_NR */ typedef struct { int_t nnz; /* number of nonzeros in the matrix */ void *nzval; /* pointer to array of nonzero values, packed by raw */ int_t *colind; /* pointer to array of columns indices of the nonzeros */ int_t *rowptr; /* pointer to array of beginning of rows in nzval[] and colind[] */ /* Note: Zero-based indexing is used; rowptr[] has nrow+1 entries, the last one pointing beyond the last row, so that rowptr[nrow] = nnz. */ } NRformat; /* Stype == SLU_SC */ typedef struct { int_t nnz; /* number of nonzeros in the matrix */ int_t nsuper; /* number of supernodes, minus 1 */ void *nzval; /* pointer to array of nonzero values, packed by column */ int_t *nzval_colptr;/* pointer to array of beginning of columns in nzval[] */ int_t *rowind; /* pointer to array of compressed row indices of rectangular supernodes */ int_t *rowind_colptr;/* pointer to array of beginning of columns in rowind[] */ int_t *col_to_sup; /* col_to_sup[j] is the supernode number to which column j belongs; mapping from column to supernode number. */ int_t *sup_to_col; /* sup_to_col[s] points to the start of the s-th supernode; mapping from supernode number to column. e.g.: col_to_sup: 0 1 2 2 3 3 3 4 4 4 4 4 4 (ncol=12) sup_to_col: 0 1 2 4 7 12 (nsuper=4) */ /* Note: Zero-based indexing is used; nzval_colptr[], rowind_colptr[], col_to_sup and sup_to_col[] have ncol+1 entries, the last one pointing beyond the last column. For col_to_sup[], only the first ncol entries are defined. For sup_to_col[], only the first nsuper+2 entries are defined. */ } SCformat; /* Stype == SLU_SCP */ typedef struct { int_t nnz; /* number of nonzeros in the matrix */ int_t nsuper; /* number of supernodes */ void *nzval; /* pointer to array of nonzero values, packed by column */ int_t *nzval_colbeg;/* nzval_colbeg[j] points to beginning of column j in nzval[] */ int_t *nzval_colend;/* nzval_colend[j] points to one past the last element of column j in nzval[] */ int_t *rowind; /* pointer to array of compressed row indices of rectangular supernodes */ int_t *rowind_colbeg;/* rowind_colbeg[j] points to beginning of column j in rowind[] */ int_t *rowind_colend;/* rowind_colend[j] points to one past the last element of column j in rowind[] */ int_t *col_to_sup; /* col_to_sup[j] is the supernode number to which column j belongs; mapping from column to supernode. */ int_t *sup_to_colbeg; /* sup_to_colbeg[s] points to the start of the s-th supernode; mapping from supernode to column.*/ int_t *sup_to_colend; /* sup_to_colend[s] points to one past the end of the s-th supernode; mapping from supernode number to column. e.g.: col_to_sup: 0 1 2 2 3 3 3 4 4 4 4 4 4 (ncol=12) sup_to_colbeg: 0 1 2 4 7 (nsuper=4) sup_to_colend: 1 2 4 7 12 */ /* Note: Zero-based indexing is used; nzval_colptr[], rowind_colptr[], col_to_sup and sup_to_col[] have ncol+1 entries, the last one pointing beyond the last column. */ } SCPformat; /* Stype == SLU_NCP */ typedef struct { int_t nnz; /* number of nonzeros in the matrix */ void *nzval; /* pointer to array of nonzero values, packed by column */ int_t *rowind;/* pointer to array of row indices of the nonzeros */ /* Note: nzval[]/rowind[] always have the same length */ int_t *colbeg;/* colbeg[j] points to the beginning of column j in nzval[] and rowind[] */ int_t *colend;/* colend[j] points to one past the last element of column j in nzval[] and rowind[] */ /* Note: Zero-based indexing is used; The consecutive columns of the nonzeros may not be contiguous in storage, because the matrix has been postmultiplied by a column permutation matrix. */ } NCPformat; /* Stype == SLU_DN */ typedef struct { int_t lda; /* leading dimension */ void *nzval; /* array of size lda*ncol to represent a dense matrix */ } DNformat; /* Stype == SLU_NR_loc (Distributed Compressed Row Format) */ typedef struct { int_t nnz_loc; /* number of nonzeros in the local submatrix */ int_t m_loc; /* number of rows local to this processor */ int_t fst_row; /* global index of the first row */ void *nzval; /* pointer to array of nonzero values, packed by row */ int_t *rowptr; /* pointer to array of beginning of rows in nzval[] and colind[] */ int_t *colind; /* pointer to array of column indices of the nonzeros */ /* Note: Zero-based indexing is used; rowptr[] has n_loc + 1 entries, the last one pointing beyond the last row, so that rowptr[n_loc] = nnz_loc.*/ } NRformat_loc; #endif /* __SUPERLU_SUPERMATRIX */ SuperLU_DIST_5.3.0/SRC/zmyblas2_dist.c0000644013363400111340000001157013233431301016221 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Level 2 BLAS operations: solves and matvec, written in C * *
 * -- SuperLU routine (version 2.0) --
 * Univ. of California Berkeley, Xerox Palo Alto Research Center,
 * and Lawrence Berkeley National Lab.
 * November 15, 1997
 * 
*/ /* * File name: zmyblas2.c * Purpose: * Level 2 BLAS operations: solves and matvec, written in C. * Note: * This is only used when the system lacks an efficient BLAS library. */ #include "dcomplex.h" /*! \brief * *
 * Solves a dense UNIT lower triangular system. The unit lower 
 * triangular matrix is stored in a 2D array M(1:nrow,1:ncol). 
 * The solution will be returned in the rhs vector.
 * 
*/ void zlsolve ( int ldm, int ncol, doublecomplex *M, doublecomplex *rhs ) { int k; doublecomplex x0, x1, x2, x3, temp; doublecomplex *M0; doublecomplex *Mki0, *Mki1, *Mki2, *Mki3; register int firstcol = 0; M0 = &M[0]; while ( firstcol < ncol - 3 ) { /* Do 4 columns */ Mki0 = M0 + 1; Mki1 = Mki0 + ldm + 1; Mki2 = Mki1 + ldm + 1; Mki3 = Mki2 + ldm + 1; x0 = rhs[firstcol]; zz_mult(&temp, &x0, Mki0); Mki0++; z_sub(&x1, &rhs[firstcol+1], &temp); zz_mult(&temp, &x0, Mki0); Mki0++; z_sub(&x2, &rhs[firstcol+2], &temp); zz_mult(&temp, &x1, Mki1); Mki1++; z_sub(&x2, &x2, &temp); zz_mult(&temp, &x0, Mki0); Mki0++; z_sub(&x3, &rhs[firstcol+3], &temp); zz_mult(&temp, &x1, Mki1); Mki1++; z_sub(&x3, &x3, &temp); zz_mult(&temp, &x2, Mki2); Mki2++; z_sub(&x3, &x3, &temp); rhs[++firstcol] = x1; rhs[++firstcol] = x2; rhs[++firstcol] = x3; ++firstcol; for (k = firstcol; k < ncol; k++) { zz_mult(&temp, &x0, Mki0); Mki0++; z_sub(&rhs[k], &rhs[k], &temp); zz_mult(&temp, &x1, Mki1); Mki1++; z_sub(&rhs[k], &rhs[k], &temp); zz_mult(&temp, &x2, Mki2); Mki2++; z_sub(&rhs[k], &rhs[k], &temp); zz_mult(&temp, &x3, Mki3); Mki3++; z_sub(&rhs[k], &rhs[k], &temp); } M0 += 4 * ldm + 4; } if ( firstcol < ncol - 1 ) { /* Do 2 columns */ Mki0 = M0 + 1; Mki1 = Mki0 + ldm + 1; x0 = rhs[firstcol]; zz_mult(&temp, &x0, Mki0); Mki0++; z_sub(&x1, &rhs[firstcol+1], &temp); rhs[++firstcol] = x1; ++firstcol; for (k = firstcol; k < ncol; k++) { zz_mult(&temp, &x0, Mki0); Mki0++; z_sub(&rhs[k], &rhs[k], &temp); zz_mult(&temp, &x1, Mki1); Mki1++; z_sub(&rhs[k], &rhs[k], &temp); } } } /*! \brief * *
 * Solves a dense upper triangular system. The upper triangular matrix is
 * stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned
 * in the rhs vector.
 * 
*/ void zusolve ( int ldm, /* in */ int ncol, /* in */ doublecomplex *M, /* in */ doublecomplex *rhs /* modified */ ) { doublecomplex xj, temp; int jcol, j, irow; jcol = ncol - 1; for (j = 0; j < ncol; j++) { slud_z_div(&xj, &rhs[jcol], &M[jcol + jcol*ldm]); /* M(jcol, jcol) */ rhs[jcol] = xj; for (irow = 0; irow < jcol; irow++) { zz_mult(&temp, &xj, &M[irow+jcol*ldm]); /* M(irow, jcol) */ z_sub(&rhs[irow], &rhs[irow], &temp); } jcol--; } return; } /*! \brief * *
 * Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec.
 * The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[].
 * 
*/ void zmatvec ( int ldm, /* in -- leading dimension of M */ int nrow, /* in */ int ncol, /* in */ doublecomplex *M, /* in */ doublecomplex *vec, /* in */ doublecomplex *Mxvec /* in/out */ ) { doublecomplex vi0, vi1, vi2, vi3; doublecomplex *M0, temp; doublecomplex *Mki0, *Mki1, *Mki2, *Mki3; register int firstcol = 0; int k; M0 = &M[0]; while ( firstcol < ncol - 3 ) { /* Do 4 columns */ Mki0 = M0; Mki1 = Mki0 + ldm; Mki2 = Mki1 + ldm; Mki3 = Mki2 + ldm; vi0 = vec[firstcol++]; vi1 = vec[firstcol++]; vi2 = vec[firstcol++]; vi3 = vec[firstcol++]; for (k = 0; k < nrow; k++) { zz_mult(&temp, &vi0, Mki0); Mki0++; z_add(&Mxvec[k], &Mxvec[k], &temp); zz_mult(&temp, &vi1, Mki1); Mki1++; z_add(&Mxvec[k], &Mxvec[k], &temp); zz_mult(&temp, &vi2, Mki2); Mki2++; z_add(&Mxvec[k], &Mxvec[k], &temp); zz_mult(&temp, &vi3, Mki3); Mki3++; z_add(&Mxvec[k], &Mxvec[k], &temp); } M0 += 4 * ldm; } while ( firstcol < ncol ) { /* Do 1 column */ Mki0 = M0; vi0 = vec[firstcol++]; for (k = 0; k < nrow; k++) { zz_mult(&temp, &vi0, Mki0); Mki0++; z_add(&Mxvec[k], &Mxvec[k], &temp); } M0 += ldm; } return; } SuperLU_DIST_5.3.0/SRC/dlangs_dist.c0000644013363400111340000000643413233431301015731 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Returns the value of the one norm, the infinity norm, or the element of largest value */ /* * File name: dlangs.c * History: Modified from lapack routine DLANGE */ #include #include "superlu_ddefs.h" /*! \brief
 
    Purpose   
    =======   

    DLANGS_dist returns the value of the one norm, or the Frobenius norm, or 
    the infinity norm, or the element of largest absolute value of a 
    real matrix A.   

    Description   
    ===========   

    DLANGE returns the value   

       DLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'   
                (   
                ( norm1(A),         NORM = '1', 'O' or 'o'   
                (   
                ( normI(A),         NORM = 'I' or 'i'   
                (   
                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'   

    where  norm1  denotes the  one norm of a matrix (maximum column sum), 
    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and 
    normF  denotes the  Frobenius norm of a matrix (square root of sum of 
    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.   

    Arguments   
    =========   

    NORM    (input) CHARACTER*1   
            Specifies the value to be returned in DLANGE as described above.   
    A       (input) SuperMatrix*
            The M by N sparse matrix A. 

   ===================================================================== 
*/ double dlangs_dist(char *norm, SuperMatrix *A) { /* Local variables */ NCformat *Astore; double *Aval; int_t i, j, irow; double value=0., sum; double *rwork; Astore = (NCformat *) A->Store; Aval = (double *) Astore->nzval; if ( SUPERLU_MIN(A->nrow, A->ncol) == 0) { value = 0.; } else if ( strncmp(norm, "M", 1)==0 ) { /* Find max(abs(A(i,j))). */ value = 0.; for (j = 0; j < A->ncol; ++j) for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) value = SUPERLU_MAX( value, fabs( Aval[i]) ); } else if ( strncmp(norm, "O", 1)==0 || *(unsigned char *)norm == '1') { /* Find norm1(A). */ value = 0.; for (j = 0; j < A->ncol; ++j) { sum = 0.; for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) sum += fabs(Aval[i]); value = SUPERLU_MAX(value, sum); } } else if ( strncmp(norm, "I", 1)==0 ) { /* Find normI(A). */ if ( !(rwork = (double *) SUPERLU_MALLOC(A->nrow * sizeof(double))) ) ABORT("SUPERLU_MALLOC fails for rwork."); for (i = 0; i < A->nrow; ++i) rwork[i] = 0.; for (j = 0; j < A->ncol; ++j) for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) { irow = Astore->rowind[i]; rwork[irow] += fabs(Aval[i]); } value = 0.; for (i = 0; i < A->nrow; ++i) value = SUPERLU_MAX(value, rwork[i]); SUPERLU_FREE (rwork); } else if ( strncmp(norm, "F", 1)==0 || strncmp(norm, "E", 1)==0 ) { /* Find normF(A). */ ABORT("Not implemented."); } else ABORT("Illegal norm specified."); return (value); } /* dlangs_dist */ SuperLU_DIST_5.3.0/SRC/get_perm_c_parmetis.c0000644013363400111340000007031013233431301017440 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Gets matrix permutation * *
 * -- Distributed symbolic factorization auxialiary routine  (version 2.1) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley - July 2003
 * INRIA France - January 2004
 * Laura Grigori
 *
 * November 1, 2007
 * 
*/ /* limits.h: the largest positive integer (INT_MAX) */ #include #include #include "superlu_dist_config.h" #ifdef HAVE_PARMETIS #include "parmetis.h" #endif #include "superlu_ddefs.h" /* * Internal protypes */ static float a_plus_at_CompRow_loc (int, int_t *, int, int_t *, int_t , int_t *, int_t *, int, int_t *, int_t *, int_t **, int_t **, gridinfo_t *); /*! \brief * *
 * Purpose
 * =======
 *
 * GET_PERM_C_PARMETIS obtains a permutation matrix Pc, by applying a
 * graph partitioning algorithm to the symmetrized graph A+A'.  The
 * multilevel graph partitioning algorithm used is the
 * ParMETIS_V3_NodeND routine available in the parallel graph
 * partitioning package parMETIS.  
 *
 * The number of independent sub-domains noDomains computed by this
 * algorithm has to be a power of 2.  Hence noDomains is the larger
 * number power of 2 that is smaller than nprocs_i, where nprocs_i = nprow
 * * npcol is the number of processors used in SuperLU_DIST.
 *
 * Arguments
 * =========
 *
 * A       (input) SuperMatrix*
 *         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
 *         of the linear equations is A->nrow.  Matrix A is distributed
 *         in NRformat_loc format.
 *
 * perm_r  (input) int_t*
 *         Row permutation vector of size A->nrow, which defines the 
 *         permutation matrix Pr; perm_r[i] = j means row i of A is in 
 *         position j in Pr*A.
 *
 * perm_c  (output) int_t*
 *	   Column permutation vector of size A->ncol, which defines the 
 *         permutation matrix Pc; perm_c[i] = j means column i of A is 
 *         in position j in A*Pc.
 *
 * nprocs_i (input) int*
 *         Number of processors the input matrix is distributed on in a block
 *         row format.  It corresponds to number of processors used in
 *         SuperLU_DIST.
 *
 * noDomains (input) int*, must be power of 2
 *         Number of independent domains to be computed by the graph
 *         partitioning algorithm.  ( noDomains <= nprocs_i )
 *
 * sizes   (output) int_t**, of size 2 * noDomains
 *         Returns pointer to an array containing the number of nodes
 *         for each sub-domain and each separator.  Separators are stored 
 *         from left to right.
 *         Memory for the array is allocated in this routine.
 *
 * fstVtxSep (output) int_t**, of size 2 * noDomains
 *         Returns pointer to an array containing first node for each
 *         sub-domain and each separator.
 *         Memory for the array is allocated in this routine.
 *
 * Return value
 * ============
 *   < 0, number of bytes allocated on return from the symbolic factorization.
 *   > 0, number of bytes allocated when out of memory.
 * 
*/ float get_perm_c_parmetis (SuperMatrix *A, int_t *perm_r, int_t *perm_c, int nprocs_i, int noDomains, int_t **sizes, int_t **fstVtxSep, gridinfo_t *grid, MPI_Comm *metis_comm) { float mem; /* Memory used during this routine */ mem = 0.; #ifdef HAVE_PARMETIS NRformat_loc *Astore; int iam, p; #if 0 int *b_rowptr_int, *b_colind_int, *l_sizes_int, *dist_order_int, *vtxdist_o_int; int *options, numflag; #else /* 64-bit integers */ int_t options[4]={0,0,0,1}, numflag; #endif int_t m_loc, fst_row; int_t m, n, bnz, i, j; int_t *rowptr, *colind, *l_fstVtxSep, *l_sizes; int_t *b_rowptr, *b_colind; int_t *dist_order; int *recvcnts, *displs; /* first row index on each processor when the matrix is distributed on nprocs (vtxdist_i) or noDomains processors (vtxdist_o) */ int_t *vtxdist_i, *vtxdist_o; int_t szSep, k, noNodes; float apat_mem_l; /* memory used during the computation of the graph of A+A' */ MPI_Status status; /* Initialization. */ MPI_Comm_rank (grid->comm, &iam); n = A->ncol; m = A->nrow; if ( m != n ) ABORT("Matrix is not square"); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter get_perm_c_parmetis()"); #endif Astore = (NRformat_loc *) A->Store; m_loc = Astore->m_loc; /* number of rows local to this processor */ fst_row = Astore->fst_row; /* global index of the first row */ rowptr = Astore->rowptr; /* pointer to rows and column indices */ colind = Astore->colind; #if ( PRNTlevel>=1 ) if ( !iam ) printf(".. Use parMETIS ordering on A'+A with %d sub-domains.\n", noDomains); #endif numflag = 0; /* determine first row on each processor */ vtxdist_i = (int_t *) SUPERLU_MALLOC((nprocs_i+1) * sizeof(int_t)); if ( !vtxdist_i ) ABORT("SUPERLU_MALLOC fails for vtxdist_i."); vtxdist_o = (int_t *) SUPERLU_MALLOC((nprocs_i+1) * sizeof(int_t)); if ( !vtxdist_o ) ABORT("SUPERLU_MALLOC fails for vtxdist_o."); MPI_Allgather (&fst_row, 1, mpi_int_t, vtxdist_i, 1, mpi_int_t, grid->comm); vtxdist_i[nprocs_i] = m; if (noDomains == nprocs_i) { /* keep the same distribution of A */ for (p = 0; p <= nprocs_i; p++) vtxdist_o[p] = vtxdist_i[p]; } else { i = n / noDomains; j = n % noDomains; for (k = 0, p = 0; p < noDomains; p++) { vtxdist_o[p] = k; k += i; if (p < j) k++; } /* The remaining non-participating processors get the same first-row-number as the last processor. */ for (p = noDomains; p <= nprocs_i; p++) vtxdist_o[p] = k; } #if ( DEBUGlevel>=2 ) if (!iam) PrintInt10 ("vtxdist_o", nprocs_i + 1, vtxdist_o); #endif /* Compute distributed A + A' */ if ((apat_mem_l = a_plus_at_CompRow_loc(iam, perm_r, nprocs_i, vtxdist_i, n, rowptr, colind, noDomains, vtxdist_o, &bnz, &b_rowptr, &b_colind, grid)) > 0) return (apat_mem_l); mem += -apat_mem_l; /* Initialize and allocate storage for parMetis. */ (*sizes) = (int_t *) SUPERLU_MALLOC(2 * noDomains * sizeof(int_t)); if (!(*sizes)) ABORT("SUPERLU_MALLOC fails for sizes."); l_sizes = *sizes; (*fstVtxSep) = (int_t *) SUPERLU_MALLOC(2 * noDomains * sizeof(int_t)); if (!(*fstVtxSep)) ABORT("SUPERLU_MALLOC fails for fstVtxSep."); l_fstVtxSep = *fstVtxSep; m_loc = vtxdist_o[iam+1] - vtxdist_o[iam]; if ( iam < noDomains) /* dist_order is the perm returned by parMetis, distributed */ if (! (dist_order = (int_t *) SUPERLU_MALLOC(m_loc * sizeof(int_t)))) ABORT("SUPERLU_MALLOC fails for dist_order."); #if 0 /* Obsolate -- now ParMETIS has 64 bit integer support. */ /* ParMETIS represents the column pointers and row indices of * * the input matrix using integers. When SuperLU_DIST uses * * long int for the int_t type, then several supplementary * * copies need to be performed in order to call ParMETIS. */ #if defined (_LONGINT) l_sizes_int = (int *) SUPERLU_MALLOC(2 * noDomains * sizeof(int)); if (!(l_sizes_int)) ABORT("SUPERLU_MALLOC fails for l_sizes_int."); /* Allocate storage */ if ( !(b_rowptr_int = (int*) SUPERLU_MALLOC((m_loc+1) * sizeof(int)))) ABORT("SUPERLU_MALLOC fails for b_rowptr_int[]"); for (i = 0; i <= m_loc; i++) b_rowptr_int[i] = b_rowptr[i]; SUPERLU_FREE (b_rowptr); if ( bnz ) { if ( !(b_colind_int = (int *) SUPERLU_MALLOC( bnz * sizeof(int)))) ABORT("SUPERLU_MALLOC fails for b_colind_int[]"); for (i = 0; i < bnz; i++) b_colind_int[i] = b_colind[i]; SUPERLU_FREE (b_colind); } if ( !(vtxdist_o_int = (int *) SUPERLU_MALLOC((nprocs_i+1) * sizeof(int)))) ABORT("SUPERLU_MALLOC fails for vtxdist_o_int."); for (i = 0; i <= nprocs_i; i++) vtxdist_o_int[i] = vtxdist_o[i]; SUPERLU_FREE (vtxdist_o); #else /* Default */ vtxdist_o_int = vtxdist_o; b_rowptr_int = b_rowptr; b_colind_int = b_colind; l_sizes_int = l_sizes; #endif #endif if ( iam < noDomains) { ParMETIS_V3_NodeND(vtxdist_o, b_rowptr, b_colind, &numflag, options, dist_order, l_sizes, metis_comm); } if (bnz) SUPERLU_FREE (b_colind); SUPERLU_FREE (b_rowptr); #if 0 if ( iam < noDomains) { SUPERLU_FREE (options); } #if defined (_LONGINT) /* Copy data from dist_order_int to dist_order */ if ( iam < noDomains) { /* dist_order is the perm returned by parMetis, distributed */ if (!(dist_order = (int_t *) SUPERLU_MALLOC(m_loc * sizeof(int_t)))) ABORT("SUPERLU_MALLOC fails for dist_order."); for (i = 0; i < m_loc; i++) dist_order[i] = dist_order_int[i]; SUPERLU_FREE(dist_order_int); for (i = 0; i < 2*noDomains; i++) l_sizes[i] = l_sizes_int[i]; SUPERLU_FREE(l_sizes_int); } #else dist_order = dist_order_int; #endif #endif /* Allgatherv dist_order to get perm_c */ if (!(displs = (int *) SUPERLU_MALLOC (nprocs_i * sizeof(int)))) ABORT ("SUPERLU_MALLOC fails for displs."); if ( !(recvcnts = (int *) SUPERLU_MALLOC (nprocs_i * sizeof(int)))) ABORT ("SUPERLU_MALLOC fails for recvcnts."); for (i = 0; i < nprocs_i; i++) recvcnts[i] = vtxdist_o[i+1] - vtxdist_o[i]; displs[0]=0; for(i=1; i < nprocs_i; i++) displs[i] = displs[i-1] + recvcnts[i-1]; MPI_Allgatherv (dist_order, m_loc, mpi_int_t, perm_c, recvcnts, displs, mpi_int_t, grid->comm); if ( iam < noDomains) { SUPERLU_FREE (dist_order); } SUPERLU_FREE (vtxdist_i); SUPERLU_FREE (vtxdist_o); SUPERLU_FREE (recvcnts); SUPERLU_FREE (displs); /* send l_sizes to every processor p >= noDomains */ if (!iam) for (p = noDomains; p < nprocs_i; p++) MPI_Send (l_sizes, 2*noDomains, mpi_int_t, p, 0, grid->comm); if (noDomains <= iam && iam < nprocs_i) MPI_Recv (l_sizes, 2*noDomains, mpi_int_t, 0, 0, grid->comm, &status); /* Determine the first node in each separator, store it in l_fstVtxSep */ for (j = 0; j < 2 * noDomains; j++) l_fstVtxSep[j] = 0; l_fstVtxSep[2*noDomains - 2] = l_sizes[2*noDomains - 2]; szSep = noDomains; i = 0; while (szSep != 1) { for (j = i; j < i + szSep; j++) { l_fstVtxSep[j] += l_sizes[j]; } for (j = i; j < i + szSep; j++) { k = i + szSep + (j-i) / 2; l_fstVtxSep[k] += l_fstVtxSep[j]; } i += szSep; szSep = szSep / 2; } l_fstVtxSep[2 * noDomains - 2] -= l_sizes[2 * noDomains - 2]; i = 2 * noDomains - 2; szSep = 1; while (i > 0) { for (j = i; j < i + szSep; j++) { k = (i - 2 * szSep) + (j-i) * 2 + 1; noNodes = l_fstVtxSep[k]; l_fstVtxSep[k] = l_fstVtxSep[j] - l_sizes[k]; l_fstVtxSep[k-1] = l_fstVtxSep[k] + l_sizes[k] - noNodes - l_sizes[k-1]; } szSep *= 2; i -= szSep; } #if ( PRNTlevel>=2 ) if (!iam ) { PrintInt10 ("Sizes of separators", 2 * noDomains-1, l_sizes); PrintInt10 ("First Vertex Separator", 2 * noDomains-1, l_fstVtxSep); } #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit get_perm_c_parmetis()"); #endif #endif /* HAVE_PARMETIS */ return (-mem); } /* get_perm_c_parmetis */ /*! \brief * *
 * Purpose
 * =======
 *
 * Form the structure of Pr*A +A'Pr'. A is an n-by-n matrix in
 * NRformat_loc format, represented by (rowptr, colind). The output
 * B=Pr*A +A'Pr' is in NRformat_loc format (symmetrically, also row
 * oriented), represented by (b_rowptr, b_colind).
 *
 * The input matrix A is distributed in block row format on nprocs_i
 * processors.  The output matrix B is distributed in block row format
 * on nprocs_o processors, where nprocs_o <= nprocs_i.  On output, the
 * matrix B has its rows permuted according to perm_r.
 *
 * Sketch of the algorithm
 * =======================
 *
 * Let iam by my process number.  Let fst_row, lst_row = m_loc +
 * fst_row be the first/last row stored on iam.
 * 
 * Compute Pr' - the inverse row permutation, stored in iperm_r.
 *
 * Compute the transpose  of the block row of Pr*A that iam owns:
 *    T[:,Pr(fst_row:lst_row)] = Pr' * A[:,fst_row:lst_row] * Pr'
 *
 *
 * All to all communication such that every processor iam receives all
 * the blocks of the transpose matrix that it needs, that is
 *           T[fst_row:lst_row, :]
 *
 * Compute B = A[fst_row:lst_row, :] + T[fst_row:lst_row, :]
 *
 * If Pr != I or nprocs_i != nprocs_o then permute the rows of B (that
 * is compute Pr*B) and redistribute from nprocs_i to nprocs_o
 * according to the block row distribution in vtxdist_i, vtxdist_o.
 * 
*/ static float a_plus_at_CompRow_loc ( int iam, /* Input - my processor number */ int_t *perm_r, /* Input - row permutation vector Pr */ int nprocs_i, /* Input - number of processors the input matrix is distributed on */ int_t *vtxdist_i, /* Input - index of first row on each processor of the input matrix */ int_t n, /* Input - number of columns in matrix A. */ int_t *rowptr, /* Input - row pointers of size m_loc+1 for matrix A. */ int_t *colind, /* Input - column indices of size nnz_loc for matrix A. */ int nprocs_o, /* Input - number of processors the output matrix is distributed on */ int_t *vtxdist_o, /* Input - index of first row on each processor of the output matrix */ int_t *p_bnz, /* Output - on exit, returns the actual number of local nonzeros in matrix A'+A. */ int_t **p_b_rowptr, /* Output - output matrix, row pointers of size m_loc+1 */ int_t **p_b_colind, /* Output - output matrix, column indices of size *p_bnz */ gridinfo_t *grid /* Input - grid of processors information */ ) { int_t i, j, k, col, num_nz, nprocs; int_t *tcolind_recv; /* temporary receive buffer */ int_t *tcolind_send; /* temporary send buffer */ int_t sz_tcolind_send, sz_tcolind_recv; int_t ind, ind_rcv; int redist_pra; /* TRUE if Pr != I or nprocs_i != nprocs_o */ int_t *marker, *iperm_r; int_t *sendCnts, *recvCnts; int_t *sdispls, *rdispls; int_t *b_rowptr, *b_colind, bnz_t, *b_rowptr_t, *b_colind_t; int_t p, t_ind, nelts, ipcol; int_t m_loc, m_loc_o; /* number of local rows */ int_t fst_row, fst_row_o; /* index of first local row */ int_t nnz_loc; /* number of local nonzeros in matrix A */ float apat_mem, apat_mem_max; int *intBuf1, *intBuf2, *intBuf3, *intBuf4; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter a_plus_at_CompRow_loc()"); #endif fst_row = vtxdist_i[iam]; m_loc = vtxdist_i[iam+1] - vtxdist_i[iam]; nnz_loc = rowptr[m_loc]; redist_pra = FALSE; nprocs = SUPERLU_MAX(nprocs_i, nprocs_o); apat_mem_max = 0.; if (!(marker = (int_t*) SUPERLU_MALLOC( (n+1) * sizeof(int_t)))) ABORT("SUPERLU_MALLOC fails for marker[]"); if (!(iperm_r = (int_t*) SUPERLU_MALLOC( n * sizeof(int_t)))) ABORT("SUPERLU_MALLOC fails for iperm_r[]"); if (!(sendCnts = (int_t*) SUPERLU_MALLOC(nprocs * sizeof(int_t)))) ABORT("SUPERLU_MALLOC fails for sendCnts[]"); if (!(recvCnts = (int_t*) SUPERLU_MALLOC(nprocs * sizeof(int_t)))) ABORT("SUPERLU_MALLOC fails for recvCnts[]"); if (!(sdispls = (int_t*) SUPERLU_MALLOC((nprocs+1) * sizeof(int_t)))) ABORT("SUPERLU_MALLOC fails for sdispls[]"); if (!(rdispls = (int_t*) SUPERLU_MALLOC((nprocs+1) * sizeof(int_t)))) ABORT("SUPERLU_MALLOC fails for rdispls[]"); apat_mem = 2 * n + 4 * nprocs + 3; #if defined (_LONGINT) intBuf1 = (int *) SUPERLU_MALLOC(4 * nprocs * sizeof(int)); intBuf2 = intBuf1 + nprocs; intBuf3 = intBuf1 + 2 * nprocs; intBuf4 = intBuf1 + 3 * nprocs; apat_mem += 4*nprocs*sizeof(int) / sizeof(int_t); #endif /* compute the inverse row permutation vector */ for (i = 0; i < n; i++) { marker[i] = 1; if (perm_r[i] != i) redist_pra = TRUE; iperm_r[perm_r[i]] = i; } /* TRANSPOSE LOCAL ROWS ON MY PROCESSOR iam. */ /* THE RESULT IS STORED IN TCOLIND_SEND. */ /* THIS COUNTS FOR TWO PASSES OF THE LOCAL MATRIX. */ /* First pass to get counts of each row of T, and set up column pointers */ for (j = 0; j < m_loc; j++) { for (i = rowptr[j]; i < rowptr[j+1]; i++){ marker[iperm_r[colind[i]]]++; } } /* determine number of elements to be sent to each processor */ for (p = 0; p < nprocs_i; p++) { sendCnts[p] = 0; for (i = vtxdist_i[p]; i < vtxdist_i[p+1]; i++) sendCnts[p] += marker[i]; } /* exchange send/receive counts information in between all processors */ MPI_Alltoall (sendCnts, 1, mpi_int_t, recvCnts, 1, mpi_int_t, grid->comm); sendCnts[iam] = 0; for (i = 0, j = 0, p = 0; p < nprocs_i; p++) { rdispls[p] = j; j += recvCnts[p]; sdispls[p] = i; i += sendCnts[p]; } recvCnts[iam] = 0; sz_tcolind_recv = j; sz_tcolind_send = i; /* allocate memory to receive necessary blocks of transpose matrix T */ if (sz_tcolind_recv) { if ( !(tcolind_recv = (int_t*) SUPERLU_MALLOC( sz_tcolind_recv * sizeof(int_t) ))) ABORT("SUPERLU_MALLOC fails tcolind_recv[]"); apat_mem += sz_tcolind_recv; } /* allocate memory to send blocks of local transpose matrix T to other processors */ if (sz_tcolind_send) { if (!(tcolind_send = (int_t*) SUPERLU_MALLOC( (sz_tcolind_send) * sizeof(int_t)))) ABORT("SUPERLU_MALLOC fails for tcolind_send[]"); apat_mem += sz_tcolind_send; } /* Set up marker[] to point at the beginning of each row in the send/receive buffer. For each row, we store first its number of elements, and then the elements. */ ind_rcv = rdispls[iam]; for (p = 0; p < nprocs_i; p++) { for (i = vtxdist_i[p]; i < vtxdist_i[p+1]; i++) { nelts = marker[i] - 1; if (p == iam) { tcolind_recv[ind_rcv] = nelts; marker[i] = ind_rcv + 1; ind_rcv += nelts + 1; } else { tcolind_send[sdispls[p]] = nelts; marker[i] = sdispls[p] + 1; sdispls[p] += nelts + 1; } } } /* reset sdispls vector */ for (i = 0, p = 0; p < nprocs_i; p++) { sdispls[p] = i; i += sendCnts[p]; } /* Second pass of the local matrix A to copy data to be sent */ for (j = 0; j < m_loc; j++) for (i = rowptr[j]; i < rowptr[j+1]; i++) { col = colind[i]; ipcol = iperm_r[col]; if (ipcol >= fst_row && ipcol < fst_row + m_loc) /* local data */ tcolind_recv[marker[ipcol]] = perm_r[j + fst_row]; else /* remote */ tcolind_send[marker[ipcol]] = perm_r[j + fst_row]; marker[ipcol] ++; } sendCnts[iam] = 0; recvCnts[iam] = 0; #if defined (_LONGINT) for (p=0; p INT_MAX || sdispls[p] > INT_MAX || recvCnts[p] > INT_MAX || rdispls[p] > INT_MAX) ABORT("ERROR in dist_symbLU size to send > INT_MAX\n"); intBuf1[p] = (int) sendCnts[p]; intBuf2[p] = (int) sdispls[p]; intBuf3[p] = (int) recvCnts[p]; intBuf4[p] = (int) rdispls[p]; } #else /* Default */ intBuf1 = sendCnts; intBuf2 = sdispls; intBuf3 = recvCnts; intBuf4 = rdispls; #endif /* send/receive transpose matrix T */ MPI_Alltoallv (tcolind_send, intBuf1, intBuf2, mpi_int_t, tcolind_recv, intBuf3, intBuf4, mpi_int_t, grid->comm); /* ------------------------------------------------------------ DEALLOCATE SEND COMMUNICATION STORAGE ------------------------------------------------------------*/ if (sz_tcolind_send) { SUPERLU_FREE( tcolind_send ); apat_mem_max = apat_mem; apat_mem -= sz_tcolind_send; } /* ---------------------------------------------------------------- FOR LOCAL ROWS: compute B = A + T, where row j of B is: Struct (B(j,:)) = Struct (A(j,:)) UNION Struct (T(j,:)) do not include the diagonal entry THIS COUNTS FOR TWO PASSES OF THE LOCAL ROWS OF A AND T. ------------------------------------------------------------------ */ /* Reset marker to EMPTY */ for (i = 0; i < n; ++i) marker[i] = EMPTY; /* save rdispls information */ for (p = 0; p < nprocs_i; p++) sdispls[p] = rdispls[p]; /* First pass determines number of nonzeros in B */ num_nz = 0; for (j = 0; j < m_loc; j++) { /* Flag the diagonal so it's not included in the B matrix */ marker[perm_r[j + fst_row]] = j; /* Add pattern of row A(j,:) to B(j,:) */ for (i = rowptr[j]; i < rowptr[j+1]; i++) { k = colind[i]; if ( marker[k] != j ) { marker[k] = j; ++num_nz; } } /* Add pattern of row T(j,:) to B(j,:) */ for (p = 0; p < nprocs_i; p++) { t_ind = rdispls[p]; nelts = tcolind_recv[t_ind]; t_ind ++; for (i = t_ind; i < t_ind + nelts; i++) { k = tcolind_recv[i]; if ( marker[k] != j ) { marker[k] = j; ++num_nz; } } t_ind += nelts; rdispls[p] = t_ind; } } bnz_t = num_nz; /* Allocate storage for B=Pr*A+A'*Pr' */ if ( !(b_rowptr_t = (int_t*) SUPERLU_MALLOC((m_loc+1) * sizeof(int_t)))) ABORT("SUPERLU_MALLOC fails for b_rowptr_t[]"); if ( bnz_t ) { if ( !(b_colind_t = (int_t*) SUPERLU_MALLOC( bnz_t * sizeof(int_t)))) ABORT("SUPERLU_MALLOC fails for b_colind_t[]"); } apat_mem += m_loc + 1 + bnz_t; if (apat_mem > apat_mem_max) apat_mem_max = apat_mem; /* Reset marker to EMPTY */ for (i = 0; i < n; i++) marker[i] = EMPTY; /* restore rdispls information */ for (p = 0; p < nprocs_i; p++) rdispls[p] = sdispls[p]; /* Second pass, compute each row of B, one at a time */ num_nz = 0; t_ind = 0; for (j = 0; j < m_loc; j++) { b_rowptr_t[j] = num_nz; /* Flag the diagonal so it's not included in the B matrix */ marker[perm_r[j + fst_row]] = j; /* Add pattern of row A(j,:) to B(j,:) */ for (i = rowptr[j]; i < rowptr[j+1]; i++) { k = colind[i]; if ( marker[k] != j ) { marker[k] = j; b_colind_t[num_nz] = k; num_nz ++; } } /* Add pattern of row T(j,:) to B(j,:) */ for (p = 0; p < nprocs_i; p++) { t_ind = rdispls[p]; nelts = tcolind_recv[t_ind]; t_ind++; for (i = t_ind; i < t_ind + nelts; i++) { k = tcolind_recv[i]; if ( marker[k] != j ) { marker[k] = j; b_colind_t[num_nz] = k; num_nz++; } } t_ind += nelts; rdispls[p] = t_ind; } } b_rowptr_t[m_loc] = num_nz; for (p = 0; p <= SUPERLU_MIN(nprocs_i, nprocs_o); p++) if (vtxdist_i[p] != vtxdist_o[p]) redist_pra = TRUE; if (sz_tcolind_recv) { SUPERLU_FREE (tcolind_recv); apat_mem -= sz_tcolind_recv; } SUPERLU_FREE (marker); SUPERLU_FREE (iperm_r); apat_mem -= 2 * n + 1; /* redistribute permuted matrix (by rows) from nproc_i processors to nproc_o processors */ if (redist_pra) { m_loc_o = vtxdist_o[iam+1] - vtxdist_o[iam]; fst_row_o = vtxdist_o[iam]; nnz_loc = 0; if ( !(b_rowptr = intMalloc_dist(m_loc_o + 1)) ) ABORT("Malloc fails for *b_rowptr[]."); apat_mem += m_loc_o + 1; if (apat_mem > apat_mem_max) apat_mem_max = apat_mem; for (p = 0; p < nprocs_i; p++) { sendCnts[p] = 0; recvCnts[p] = 0; } for (i = 0; i < m_loc; i++) { k = perm_r[i+fst_row]; /* find the processor to which row k belongs */ j = FALSE; p = 0; while (!j) { if (vtxdist_o[p] <= k && k < vtxdist_o[p+1]) j = TRUE; else p ++; } if (p == iam) { b_rowptr[k-fst_row_o] = b_rowptr_t[i + 1] - b_rowptr_t[i]; nnz_loc += b_rowptr[k-fst_row_o]; } else sendCnts[p] += b_rowptr_t[i + 1] - b_rowptr_t[i] + 2; } /* exchange send/receive counts information in between all processors */ MPI_Alltoall (sendCnts, 1, mpi_int_t, recvCnts, 1, mpi_int_t, grid->comm); for (i = 0, j = 0, p = 0; p < nprocs_i; p++) { rdispls[p] = j; j += recvCnts[p]; sdispls[p] = i; i += sendCnts[p]; } rdispls[p] = j; sdispls[p] = i; sz_tcolind_recv = j; sz_tcolind_send = i; /* allocate memory for local data */ tcolind_recv = NULL; tcolind_send = NULL; if (sz_tcolind_recv) { if ( !(tcolind_recv = (int_t*) SUPERLU_MALLOC( sz_tcolind_recv * sizeof(int_t) ))) ABORT("SUPERLU_MALLOC fails tcolind_recv[]"); apat_mem += sz_tcolind_recv; } /* allocate memory to receive necessary data */ if (sz_tcolind_send) { if (!(tcolind_send = (int_t*) SUPERLU_MALLOC( (sz_tcolind_send) * sizeof(int_t)))) ABORT("SUPERLU_MALLOC fails for tcolind_send[]"); apat_mem += sz_tcolind_send; } if (apat_mem > apat_mem_max) apat_mem_max = apat_mem; /* Copy data to be sent */ ind_rcv = rdispls[iam]; for (i = 0; i < m_loc; i++) { k = perm_r[i+fst_row]; /* find the processor to which row k belongs */ j = FALSE; p = 0; while (!j) { if (vtxdist_o[p] <= k && k < vtxdist_o[p+1]) j = TRUE; else p ++; } if (p != iam) { /* remote */ tcolind_send[sdispls[p]] = k; tcolind_send[sdispls[p]+1] = b_rowptr_t[i+1] - b_rowptr_t[i]; sdispls[p] += 2; for (j = b_rowptr_t[i]; j < b_rowptr_t[i+1]; j++) { tcolind_send[sdispls[p]] = b_colind_t[j]; sdispls[p] ++; } } } /* reset sdispls vector */ for (i = 0, p = 0; p < nprocs_i; p++) { sdispls[p] = i; i += sendCnts[p]; } sendCnts[iam] = 0; recvCnts[iam] = 0; #if defined (_LONGINT) for (p=0; p INT_MAX || sdispls[p] > INT_MAX || recvCnts[p] > INT_MAX || rdispls[p] > INT_MAX) ABORT("ERROR in dist_symbLU size to send > INT_MAX\n"); intBuf1[p] = (int) sendCnts[p]; intBuf2[p] = (int) sdispls[p]; intBuf3[p] = (int) recvCnts[p]; intBuf4[p] = (int) rdispls[p]; } #else /* Default */ intBuf1 = sendCnts; intBuf2 = sdispls; intBuf3 = recvCnts; intBuf4 = rdispls; #endif /* send/receive permuted matrix T by rows */ MPI_Alltoallv (tcolind_send, intBuf1, intBuf2, mpi_int_t, tcolind_recv, intBuf3, intBuf4, mpi_int_t, grid->comm); /* ------------------------------------------------------------ DEALLOCATE COMMUNICATION STORAGE ------------------------------------------------------------*/ if (sz_tcolind_send) { SUPERLU_FREE( tcolind_send ); apat_mem -= sz_tcolind_send; } /* ------------------------------------------------------------ STORE ROWS IN ASCENDING ORDER OF THEIR NUMBER ------------------------------------------------------------*/ for (p = 0; p < nprocs; p++) { if (p != iam) { i = rdispls[p]; while (i < rdispls[p+1]) { j = tcolind_recv[i]; nelts = tcolind_recv[i+1]; i += 2 + nelts; b_rowptr[j-fst_row_o] = nelts; nnz_loc += nelts; } } } if (nnz_loc) { if ( !(b_colind = intMalloc_dist(nnz_loc)) ) { ABORT("Malloc fails for bcolind[]."); apat_mem += nnz_loc; if (apat_mem > apat_mem_max) apat_mem_max = apat_mem; } } /* Initialize the array of row pointers */ k = 0; for (j = 0; j < m_loc_o; j++) { i = b_rowptr[j]; b_rowptr[j] = k; k += i; } if (m_loc_o) b_rowptr[j] = k; /* Copy the data into the row oriented storage */ for (p = 0; p < nprocs; p++) { if (p != iam) { i = rdispls[p]; while (i < rdispls[p+1]) { j = tcolind_recv[i]; nelts = tcolind_recv[i+1]; for (i += 2, k = b_rowptr[j-fst_row_o]; k < b_rowptr[j-fst_row_o+1]; i++, k++) b_colind[k] = tcolind_recv[i]; } } } for (i = 0; i < m_loc; i++) { k = perm_r[i+fst_row]; if (k >= vtxdist_o[iam] && k < vtxdist_o[iam+1]) { ind = b_rowptr[k-fst_row_o]; for (j = b_rowptr_t[i]; j < b_rowptr_t[i+1]; j++, ind++) b_colind[ind] = b_colind_t[j]; } } SUPERLU_FREE(b_rowptr_t); if ( bnz_t ) SUPERLU_FREE(b_colind_t); if (sz_tcolind_recv) SUPERLU_FREE(tcolind_recv); apat_mem -= bnz_t + m_loc + sz_tcolind_recv; *p_bnz = nnz_loc; *p_b_rowptr = b_rowptr; *p_b_colind = b_colind; } else { /* no need for redistribution */ *p_bnz = bnz_t; *p_b_rowptr = b_rowptr_t; *p_b_colind = b_colind_t; } SUPERLU_FREE (rdispls); SUPERLU_FREE (sdispls); SUPERLU_FREE (sendCnts); SUPERLU_FREE (recvCnts); apat_mem -= 4 * nprocs + 2; #if defined (_LONGINT) SUPERLU_FREE (intBuf1); apat_mem -= 4*nprocs*sizeof(int) / sizeof(int_t); #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit a_plus_at_CompRow_loc()"); #endif return (- apat_mem_max * sizeof(int_t)); } /* a_plus_at_CompRow_loc */ SuperLU_DIST_5.3.0/SRC/pzgsequ.c0000644013363400111340000001650213233431301015131 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Computes row and column scalings * * File name: pzgsequ.c * History: Modified from LAPACK routine ZGEEQU */ #include #include "superlu_zdefs.h" /*! \brief
    
    Purpose   
    =======   

    PZGSEQU computes row and column scalings intended to equilibrate an   
    M-by-N sparse matrix A and reduce its condition number. R returns the row
    scale factors and C the column scale factors, chosen to try to make   
    the largest element in each row and column of the matrix B with   
    elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1.   

    R(i) and C(j) are restricted to be between SMLNUM = smallest safe   
    number and BIGNUM = largest safe number.  Use of these scaling   
    factors is not guaranteed to reduce the condition number of A but   
    works well in practice.   

    See supermatrix.h for the definition of 'SuperMatrix' structure.
 
    Arguments   
    =========   

    A       (input) SuperMatrix*
            The matrix of dimension (A->nrow, A->ncol) whose equilibration
            factors are to be computed. The type of A can be:
            Stype = SLU_NR_loc; Dtype = SLU_Z; Mtype = SLU_GE.
	    
    R       (output) double*, size A->nrow
            If INFO = 0 or INFO > M, R contains the row scale factors   
            for A.
	    
    C       (output) double*, size A->ncol
            If INFO = 0,  C contains the column scale factors for A.
	    
    ROWCND  (output) double*
            If INFO = 0 or INFO > M, ROWCND contains the ratio of the   
            smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and   
            AMAX is neither too large nor too small, it is not worth   
            scaling by R.
	    
    COLCND  (output) double*
            If INFO = 0, COLCND contains the ratio of the smallest   
            C(i) to the largest C(i).  If COLCND >= 0.1, it is not   
            worth scaling by C.
	    
    AMAX    (output) double*
            Absolute value of largest matrix element.  If AMAX is very   
            close to overflow or very close to underflow, the matrix   
            should be scaled.
	    
    INFO    (output) int*
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   
            > 0:  if INFO = i,  and i is   
                  <= M:  the i-th row of A is exactly zero   
                  >  M:  the (i-M)-th column of A is exactly zero   

    GRID    (input) gridinof_t*
            The 2D process mesh.
    ===================================================================== 
*/ void pzgsequ(SuperMatrix *A, double *r, double *c, double *rowcnd, double *colcnd, double *amax, int_t *info, gridinfo_t *grid) { /* Local variables */ NRformat_loc *Astore; doublecomplex *Aval; int i, j, irow, jcol, m_loc; double rcmin, rcmax; double bignum, smlnum; double tempmax, tempmin; double *loc_max; int *r_sizes, *displs; double *loc_r; int_t procs; /* Test the input parameters. */ *info = 0; if ( A->nrow < 0 || A->ncol < 0 || A->Stype != SLU_NR_loc || A->Dtype != SLU_Z || A->Mtype != SLU_GE ) *info = -1; if (*info != 0) { i = -(*info); pxerr_dist("pzgsequ", grid, i); return; } /* Quick return if possible */ if ( A->nrow == 0 || A->ncol == 0 ) { *rowcnd = 1.; *colcnd = 1.; *amax = 0.; return; } Astore = A->Store; Aval = Astore->nzval; m_loc = Astore->m_loc; /* Get machine constants. */ smlnum = dmach_dist("S"); bignum = 1. / smlnum; /* Compute row scale factors. */ for (i = 0; i < A->nrow; ++i) r[i] = 0.; /* Find the maximum element in each row. */ irow = Astore->fst_row; for (i = 0; i < m_loc; ++i) { for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) r[irow] = SUPERLU_MAX( r[irow], slud_z_abs1(&Aval[j]) ); ++irow; } /* Find the maximum and minimum scale factors. */ rcmin = bignum; rcmax = 0.; for (i = Astore->fst_row; i < Astore->fst_row + m_loc; ++i) { rcmax = SUPERLU_MAX(rcmax, r[i]); rcmin = SUPERLU_MIN(rcmin, r[i]); } /* Get the global MAX and MIN for R */ tempmax = rcmax; tempmin = rcmin; MPI_Allreduce( &tempmax, &rcmax, 1, MPI_DOUBLE, MPI_MAX, grid->comm); MPI_Allreduce( &tempmin, &rcmin, 1, MPI_DOUBLE, MPI_MIN, grid->comm); *amax = rcmax; if (rcmin == 0.) { /* Find the first zero scale factor and return an error code. */ for (i = 0; i < A->nrow; ++i) if (r[i] == 0.) { *info = i + 1; return; } } else { /* Invert the scale factors. */ for (i = 0; i < A->nrow; ++i) r[i] = 1. / SUPERLU_MIN( SUPERLU_MAX( r[i], smlnum ), bignum ); /* Compute ROWCND = min(R(I)) / max(R(I)) */ *rowcnd = SUPERLU_MAX( rcmin, smlnum ) / SUPERLU_MIN( rcmax, bignum ); } /* Compute column scale factors */ for (j = 0; j < A->ncol; ++j) c[j] = 0.; /* Find the maximum element in each column, assuming the row scalings computed above. */ irow = Astore->fst_row; for (i = 0; i < m_loc; ++i) { for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) { jcol = Astore->colind[j]; c[jcol] = SUPERLU_MAX( c[jcol], slud_z_abs1(&Aval[j]) * r[irow] ); } ++irow; } /* Find the global maximum for c[j] */ if ( !(loc_max = doubleMalloc_dist(A->ncol))) ABORT("Malloc fails for loc_max[]."); for (j = 0; j < A->ncol; ++j) loc_max[j] = c[j]; MPI_Allreduce(loc_max, c, A->ncol, MPI_DOUBLE, MPI_MAX, grid->comm); SUPERLU_FREE(loc_max); /* Find the maximum and minimum scale factors. */ rcmin = bignum; rcmax = 0.; for (j = 0; j < A->ncol; ++j) { rcmax = SUPERLU_MAX(rcmax, c[j]); rcmin = SUPERLU_MIN(rcmin, c[j]); } if (rcmin == 0.) { /* Find the first zero scale factor and return an error code. */ for (j = 0; j < A->ncol; ++j) if ( c[j] == 0. ) { *info = A->nrow + j + 1; return; } } else { /* Invert the scale factors. */ for (j = 0; j < A->ncol; ++j) c[j] = 1. / SUPERLU_MIN( SUPERLU_MAX( c[j], smlnum ), bignum); /* Compute COLCND = min(C(J)) / max(C(J)) */ *colcnd = SUPERLU_MAX( rcmin, smlnum ) / SUPERLU_MIN( rcmax, bignum ); } /* gather R from each process to get the global R. */ procs = grid->nprow * grid->npcol; if ( !(r_sizes = SUPERLU_MALLOC(2 * procs * sizeof(int)))) ABORT("Malloc fails for r_sizes[]."); displs = r_sizes + procs; if ( !(loc_r = doubleMalloc_dist(m_loc))) ABORT("Malloc fails for loc_r[]."); j = Astore->fst_row; for (i = 0; i < m_loc; ++i) loc_r[i] = r[j++]; /* First gather the size of each piece. */ MPI_Allgather(&m_loc, 1, MPI_INT, r_sizes, 1, MPI_INT, grid->comm); /* Set up the displacements for allgatherv */ displs[0] = 0; for (i = 1; i < procs; ++i) displs[i] = displs[i-1] + r_sizes[i-1]; /* Now gather the actual data */ MPI_Allgatherv(loc_r, m_loc, MPI_DOUBLE, r, r_sizes, displs, MPI_DOUBLE, grid->comm); SUPERLU_FREE(r_sizes); SUPERLU_FREE(loc_r); return; } /* pzgsequ */ SuperLU_DIST_5.3.0/SRC/pdgstrf_irecv.c0000644013363400111340000011750213233431301016276 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Performs LU factorization in parallel * *
 * -- Distributed SuperLU routine (version 1.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 *
 * Modified:
 *     Feburary 7, 2001    use MPI_Isend/MPI_Irecv
 *
 *
 * Sketch of the algorithm
 * =======================
 *
 * The following relations hold:
 *     * A_kk = L_kk * U_kk
 *     * L_ik = Aik * U_kk^(-1)
 *     * U_kj = L_kk^(-1) * A_kj
 *
 *              ----------------------------------
 *              |   |                            |
 *              ----|-----------------------------
 *              |   | \ U_kk|                    |
 *              |   |   \   |        U_kj        |
 *              |   |L_kk \ |         ||         |
 *              ----|-------|---------||----------
 *              |   |       |         \/         |
 *              |   |       |                    |
 *              |   |       |                    |
 *              |   |       |                    |
 *              |   | L_ik ==>       A_ij        |
 *              |   |       |                    |
 *              |   |       |                    |
 *              |   |       |                    |
 *              ----------------------------------
 *
 * Handle the first block of columns separately.
 *     * Factor diagonal and subdiagonal blocks and test for exact
 *       singularity. ( pdgstrf2(0), one column at a time )
 *     * Compute block row of U
 *     * Update trailing matrix
 * 
 * Loop over the remaining blocks of columns.
 *   mycol = MYCOL( iam, grid );
 *   myrow = MYROW( iam, grid );
 *   N = nsupers;
 *   For (k = 1; k < N; ++k) {
 *       krow = PROW( k, grid );
 *       kcol = PCOL( k, grid );
 *       Pkk = PNUM( krow, kcol, grid );
 *
 *     * Factor diagonal and subdiagonal blocks and test for exact
 *       singularity.
 *       if ( mycol == kcol ) {
 *           pdgstrf2(k), one column at a time 
 *       }
 *
 *     * Parallel triangular solve
 *       if ( iam == Pkk ) multicast L_k,k to this process row;
 *       if ( myrow == krow && mycol != kcol ) {
 *          Recv L_k,k from process Pkk;
 *          for (j = k+1; j < N; ++j) 
 *              if ( PCOL( j, grid ) == mycol && A_k,j != 0 )
 *                 U_k,j = L_k,k \ A_k,j;
 *       }
 *
 *     * Parallel rank-k update
 *       if ( myrow == krow ) multicast U_k,k+1:N to this process column;
 *       if ( mycol == kcol ) multicast L_k+1:N,k to this process row;
 *       if ( myrow != krow ) {
 *          Pkj = PNUM( krow, mycol, grid );
 *          Recv U_k,k+1:N from process Pkj;
 *       }
 *       if ( mycol != kcol ) {
 *          Pik = PNUM( myrow, kcol, grid );
 *          Recv L_k+1:N,k from process Pik;
 *       }
 *       for (j = k+1; k < N; ++k) {
 *          for (i = k+1; i < N; ++i) 
 *              if ( myrow == PROW( i, grid ) && mycol == PCOL( j, grid )
 *                   && L_i,k != 0 && U_k,j != 0 )
 *                 A_i,j = A_i,j - L_i,k * U_k,j;
 *       }
 *  }
 *
 *
 * Remaining issues
 *   (1) Use local indices for L subscripts and SPA.  [DONE]
 * 
*/ #include #include "superlu_ddefs.h" #if ( VAMPIR>=1 ) #include #endif /* * Internal prototypes */ static void pdgstrf2(superlu_options_t *, int_t, double, Glu_persist_t *, gridinfo_t *, LocalLU_t *, SuperLUStat_t *, int *); #ifdef _CRAY static void pdgstrs2(int_t, int_t, Glu_persist_t *, gridinfo_t *, LocalLU_t *, SuperLUStat_t *, _fcd, _fcd, _fcd); #else static void pdgstrs2(int_t, int_t, Glu_persist_t *, gridinfo_t *, LocalLU_t *, SuperLUStat_t *); #endif /************************************************************************/ /*! \brief * *
 * Purpose
 * =======
 *
 *  PDGSTRF performs the LU factorization in parallel.
 *
 * Arguments
 * =========
 * 
 * options (input) superlu_options_t*
 *         The structure defines the input parameters to control
 *         how the LU decomposition will be performed.
 *         The following field should be defined:
 *         o ReplaceTinyPivot (yes_no_t)
 *           Specifies whether to replace the tiny diagonals by
 *           sqrt(epsilon)*norm(A) during LU factorization.
 *
 * m      (input) int
 *        Number of rows in the matrix.
 *
 * n      (input) int
 *        Number of columns in the matrix.
 *
 * anorm  (input) double
 *        The norm of the original matrix A, or the scaled A if
 *        equilibration was done.
 *
 * LUstruct (input/output) LUstruct_t*
 *         The data structures to store the distributed L and U factors.
 *         The following fields should be defined:
 *
 *         o Glu_persist (input) Glu_persist_t*
 *           Global data structure (xsup, supno) replicated on all processes,
 *           describing the supernode partition in the factored matrices
 *           L and U:
 *	       xsup[s] is the leading column of the s-th supernode,
 *             supno[i] is the supernode number to which column i belongs.
 *
 *         o Llu (input/output) LocalLU_t*
 *           The distributed data structures to store L and U factors.
 *           See superlu_ddefs.h for the definition of 'LocalLU_t'.
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh. It contains the MPI communicator, the number
 *        of process rows (NPROW), the number of process columns (NPCOL),
 *        and my process rank. It is an input argument to all the
 *        parallel routines.
 *        Grid can be initialized by subroutine SUPERLU_GRIDINIT.
 *        See superlu_ddefs.h for the definition of 'gridinfo_t'.
 *
 * stat   (output) SuperLUStat_t*
 *        Record the statistics on runtime and floating-point operation count.
 *        See util.h for the definition of 'SuperLUStat_t'.
 *
 * info   (output) int*
 *        = 0: successful exit
 *        < 0: if info = -i, the i-th argument had an illegal value
 *        > 0: if info = i, U(i,i) is exactly zero. The factorization has
 *             been completed, but the factor U is exactly singular,
 *             and division by zero will occur if it is used to solve a
 *             system of equations.
 * 
*/ int_t pdgstrf /************************************************************************/ ( superlu_options_t *options, int m, int n, double anorm, LUstruct_t *LUstruct, gridinfo_t *grid, SuperLUStat_t *stat, int *info ) { #ifdef _CRAY _fcd ftcs = _cptofcd("N", strlen("N")); _fcd ftcs1 = _cptofcd("L", strlen("L")); _fcd ftcs2 = _cptofcd("N", strlen("N")); _fcd ftcs3 = _cptofcd("U", strlen("U")); #endif double alpha = 1.0, beta = 0.0; int_t *xsup; int_t *lsub, *lsub1, *usub, *Usub_buf, *Lsub_buf_2[2]; /* Need 2 buffers to implement Irecv. */ double *lusup, *lusup1, *uval, *Uval_buf, *Lval_buf_2[2]; /* Need 2 buffers to implement Irecv. */ int_t fnz, i, ib, ijb, ilst, it, iukp, jb, jj, klst, knsupc, lb, lib, ldv, ljb, lptr, lptr0, lptrj, luptr, luptr0, luptrj, nlb, nub, nsupc, rel, rukp; int_t Pc, Pr; int iam, kcol, krow, mycol, myrow, pi, pj; int j, k, lk, nsupers; int nsupr, nbrow, segsize; int msgcnt[4]; /* Count the size of the message xfer'd in each buffer: * 0 : transferred in Lsub_buf[] * 1 : transferred in Lval_buf[] * 2 : transferred in Usub_buf[] * 3 : transferred in Uval_buf[] */ int_t msg0, msg2; int_t **Ufstnz_br_ptr, **Lrowind_bc_ptr; double **Unzval_br_ptr, **Lnzval_bc_ptr; int_t *index; double *nzval; int_t *iuip, *ruip;/* Pointers to U index/nzval; size ceil(NSUPERS/Pr). */ double *ucol; int_t *indirect; double *tempv, *tempv2d; int_t iinfo; int_t *ToRecv, *ToSendD, **ToSendR; Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; superlu_scope_t *scp; float s_eps; double thresh; double *tempU2d, *tempu; int full, ldt, ldu, lead_zero, ncols; MPI_Request recv_req[4], *send_req; MPI_Status status; #if ( DEBUGlevel>=2 ) int_t num_copy=0, num_update=0; #endif #if ( PRNTlevel==3 ) int_t zero_msg = 0, total_msg = 0; #endif #if ( PROFlevel>=1 ) double t1, t2; float msg_vol = 0, msg_cnt = 0; int_t iword = sizeof(int_t), dword = sizeof(double); #endif /* Test the input parameters. */ *info = 0; if ( m < 0 ) *info = -2; else if ( n < 0 ) *info = -3; if ( *info ) { pxerbla("pdgstrf", grid, -*info); return (-1); } /* Quick return if possible. */ if ( m == 0 || n == 0 ) return 0; /* * Initialization. */ iam = grid->iam; Pc = grid->npcol; Pr = grid->nprow; myrow = MYROW( iam, grid ); mycol = MYCOL( iam, grid ); nsupers = Glu_persist->supno[n-1] + 1; xsup = Glu_persist->xsup; s_eps = slamch_("Epsilon"); thresh = s_eps * anorm; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter pdgstrf()"); #endif stat->ops[FACT] = 0.0; if ( Pr*Pc > 1 ) { i = Llu->bufmax[0]; if ( !(Llu->Lsub_buf_2[0] = intMalloc_dist(2 * ((size_t)i))) ) ABORT("Malloc fails for Lsub_buf."); Llu->Lsub_buf_2[1] = Llu->Lsub_buf_2[0] + i; i = Llu->bufmax[1]; if ( !(Llu->Lval_buf_2[0] = doubleMalloc_dist(2 * ((size_t)i))) ) ABORT("Malloc fails for Lval_buf[]."); Llu->Lval_buf_2[1] = Llu->Lval_buf_2[0] + i; if ( Llu->bufmax[2] != 0 ) if ( !(Llu->Usub_buf = intMalloc_dist(Llu->bufmax[2])) ) ABORT("Malloc fails for Usub_buf[]."); if ( Llu->bufmax[3] != 0 ) if ( !(Llu->Uval_buf = doubleMalloc_dist(Llu->bufmax[3])) ) ABORT("Malloc fails for Uval_buf[]."); if ( !(send_req = (MPI_Request *) SUPERLU_MALLOC(2*Pc*sizeof(MPI_Request)))) ABORT("Malloc fails for send_req[]."); } if ( !(Llu->ujrow = doubleMalloc_dist(sp_ienv_dist(3))) ) ABORT("Malloc fails for ujrow[]."); #if ( PRNTlevel>=1 ) if ( !iam ) { printf(".. thresh = s_eps %e * anorm %e = %e\n", s_eps, anorm, thresh); printf(".. Buffer size: Lsub %d\tLval %d\tUsub %d\tUval %d\tLDA %d\n", Llu->bufmax[0], Llu->bufmax[1], Llu->bufmax[2], Llu->bufmax[3], Llu->bufmax[4]); } #endif Lsub_buf_2[0] = Llu->Lsub_buf_2[0]; Lsub_buf_2[1] = Llu->Lsub_buf_2[1]; Lval_buf_2[0] = Llu->Lval_buf_2[0]; Lval_buf_2[1] = Llu->Lval_buf_2[1]; Usub_buf = Llu->Usub_buf; Uval_buf = Llu->Uval_buf; Lrowind_bc_ptr = Llu->Lrowind_bc_ptr; Lnzval_bc_ptr = Llu->Lnzval_bc_ptr; Ufstnz_br_ptr = Llu->Ufstnz_br_ptr; Unzval_br_ptr = Llu->Unzval_br_ptr; ToRecv = Llu->ToRecv; ToSendD = Llu->ToSendD; ToSendR = Llu->ToSendR; ldt = sp_ienv_dist(3); /* Size of maximum supernode */ if ( !(tempv2d = doubleCalloc_dist(2*((size_t)ldt)*ldt)) ) ABORT("Calloc fails for tempv2d[]."); tempU2d = tempv2d + ldt*ldt; if ( !(indirect = intMalloc_dist(ldt)) ) ABORT("Malloc fails for indirect[]."); k = CEILING( nsupers, Pr ); /* Number of local block rows */ if ( !(iuip = intMalloc_dist(k)) ) ABORT("Malloc fails for iuip[]."); if ( !(ruip = intMalloc_dist(k)) ) ABORT("Malloc fails for ruip[]."); #if ( VAMPIR>=1 ) VT_symdef(1, "Send-L", "Comm"); VT_symdef(2, "Recv-L", "Comm"); VT_symdef(3, "Send-U", "Comm"); VT_symdef(4, "Recv-U", "Comm"); VT_symdef(5, "TRF2", "Factor"); VT_symdef(100, "Factor", "Factor"); VT_begin(100); VT_traceon(); #endif /* --------------------------------------------------------------- Handle the first block column separately to start the pipeline. --------------------------------------------------------------- */ if ( mycol == 0 ) { #if ( VAMPIR>=1 ) VT_begin(5); #endif pdgstrf2(options, 0, thresh, Glu_persist, grid, Llu, stat, info); #if ( VAMPIR>=1 ) VT_end(5); #endif scp = &grid->rscp; /* The scope of process row. */ /* Process column *kcol* multicasts numeric values of L(:,k) to process rows. */ lsub = Lrowind_bc_ptr[0]; lusup = Lnzval_bc_ptr[0]; if ( lsub ) { msgcnt[0] = lsub[1] + BC_HEADER + lsub[0]*LB_DESCRIPTOR; msgcnt[1] = lsub[1] * SuperSize( 0 ); } else { msgcnt[0] = msgcnt[1] = 0; } for (pj = 0; pj < Pc; ++pj) { if ( ToSendR[0][pj] != EMPTY ) { #if ( PROFlevel>=1 ) TIC(t1); #endif #if ( VAMPIR>=1 ) VT_begin(1); #endif MPI_Isend( lsub, msgcnt[0], mpi_int_t, pj, 0, scp->comm, &send_req[pj] ); MPI_Isend( lusup, msgcnt[1], MPI_DOUBLE, pj, 1, scp->comm, &send_req[pj+Pc] ); #if ( DEBUGlevel>=2 ) printf("(%d) Send L(:,%4d): lsub %4d, lusup %4d to Pc %2d\n", iam, 0, msgcnt[0], msgcnt[1], pj); #endif #if ( VAMPIR>=1 ) VT_end(1); #endif #if ( PROFlevel>=1 ) TOC(t2, t1); stat->utime[COMM] += t2; msg_cnt += 2; msg_vol += msgcnt[0]*iword + msgcnt[1]*dword; #endif } } /* for pj ... */ } else { /* Post immediate receives. */ if ( ToRecv[0] >= 1 ) { /* Recv block column L(:,0). */ scp = &grid->rscp; /* The scope of process row. */ MPI_Irecv( Lsub_buf_2[0], Llu->bufmax[0], mpi_int_t, 0, 0, scp->comm, &recv_req[0] ); MPI_Irecv( Lval_buf_2[0], Llu->bufmax[1], MPI_DOUBLE, 0, 1, scp->comm, &recv_req[1] ); #if ( DEBUGlevel>=2 ) printf("(%d) Post Irecv L(:,%4d)\n", iam, 0); #endif } } /* if mycol == 0 */ /* ------------------------------------------ MAIN LOOP: Loop through all block columns. ------------------------------------------ */ for (k = 0; k < nsupers; ++k) { knsupc = SuperSize( k ); krow = PROW( k, grid ); kcol = PCOL( k, grid ); if ( mycol == kcol ) { lk = LBj( k, grid ); /* Local block number. */ for (pj = 0; pj < Pc; ++pj) { /* Wait for Isend to complete before using lsub/lusup. */ if ( ToSendR[lk][pj] != EMPTY ) { MPI_Wait( &send_req[pj], &status ); MPI_Wait( &send_req[pj+Pc], &status ); } } lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; } else { if ( ToRecv[k] >= 1 ) { /* Recv block column L(:,k). */ scp = &grid->rscp; /* The scope of process row. */ #if ( PROFlevel>=1 ) TIC(t1); #endif #if ( VAMPIR>=1 ) VT_begin(2); #endif /*probe_recv(iam, kcol, (4*k)%NTAGS, mpi_int_t, scp->comm, Llu->bufmax[0]);*/ /*MPI_Recv( Lsub_buf, Llu->bufmax[0], mpi_int_t, kcol, (4*k)%NTAGS, scp->comm, &status );*/ MPI_Wait( &recv_req[0], &status ); MPI_Get_count( &status, mpi_int_t, &msgcnt[0] ); /*probe_recv(iam, kcol, (4*k+1)%NTAGS, MPI_DOUBLE, scp->comm, Llu->bufmax[1]);*/ /*MPI_Recv( Lval_buf, Llu->bufmax[1], MPI_DOUBLE, kcol, (4*k+1)%NTAGS, scp->comm, &status );*/ MPI_Wait( &recv_req[1], &status ); MPI_Get_count( &status, MPI_DOUBLE, &msgcnt[1] ); #if ( VAMPIR>=1 ) VT_end(2); #endif #if ( PROFlevel>=1 ) TOC(t2, t1); stat->utime[COMM] += t2; #endif #if ( DEBUGlevel>=2 ) printf("(%d) Recv L(:,%4d): lsub %4d, lusup %4d from Pc %2d\n", iam, k, msgcnt[0], msgcnt[1], kcol); fflush(stdout); #endif lsub = Lsub_buf_2[k%2]; lusup = Lval_buf_2[k%2]; #if ( PRNTlevel==3 ) ++total_msg; if ( !msgcnt[0] ) ++zero_msg; #endif } else msgcnt[0] = 0; } /* if mycol = Pc(k) */ scp = &grid->cscp; /* The scope of process column. */ if ( myrow == krow ) { /* Parallel triangular solve across process row *krow* -- U(k,j) = L(k,k) \ A(k,j). */ #ifdef _CRAY pdgstrs2(n, k, Glu_persist, grid, Llu, stat, ftcs1, ftcs2, ftcs3); #else pdgstrs2(n, k, Glu_persist, grid, Llu, stat); #endif /* Multicasts U(k,:) to process columns. */ lk = LBi( k, grid ); usub = Ufstnz_br_ptr[lk]; uval = Unzval_br_ptr[lk]; if ( usub ) { msgcnt[2] = usub[2]; msgcnt[3] = usub[1]; } else { msgcnt[2] = msgcnt[3] = 0; } if ( ToSendD[lk] == YES ) { for (pi = 0; pi < Pr; ++pi) { if ( pi != myrow ) { #if ( PROFlevel>=1 ) TIC(t1); #endif #if ( VAMPIR>=1 ) VT_begin(3); #endif MPI_Send( usub, msgcnt[2], mpi_int_t, pi, (4*k+2)%NTAGS, scp->comm); MPI_Send( uval, msgcnt[3], MPI_DOUBLE, pi, (4*k+3)%NTAGS, scp->comm); #if ( VAMPIR>=1 ) VT_end(3); #endif #if ( PROFlevel>=1 ) TOC(t2, t1); stat->utime[COMM] += t2; msg_cnt += 2; msg_vol += msgcnt[2]*iword + msgcnt[3]*dword; #endif #if ( DEBUGlevel>=2 ) printf("(%d) Send U(%4d,:) to Pr %2d\n", iam, k, pi); #endif } /* if pi ... */ } /* for pi ... */ } /* if ToSendD ... */ } else { /* myrow != krow */ if ( ToRecv[k] == 2 ) { /* Recv block row U(k,:). */ #if ( PROFlevel>=1 ) TIC(t1); #endif #if ( VAMPIR>=1 ) VT_begin(4); #endif /*probe_recv(iam, krow, (4*k+2)%NTAGS, mpi_int_t, scp->comm, Llu->bufmax[2]);*/ MPI_Recv( Usub_buf, Llu->bufmax[2], mpi_int_t, krow, (4*k+2)%NTAGS, scp->comm, &status ); MPI_Get_count( &status, mpi_int_t, &msgcnt[2] ); /*probe_recv(iam, krow, (4*k+3)%NTAGS, MPI_DOUBLE, scp->comm, Llu->bufmax[3]);*/ MPI_Recv( Uval_buf, Llu->bufmax[3], MPI_DOUBLE, krow, (4*k+3)%NTAGS, scp->comm, &status ); MPI_Get_count( &status, MPI_DOUBLE, &msgcnt[3] ); #if ( VAMPIR>=1 ) VT_end(4); #endif #if ( PROFlevel>=1 ) TOC(t2, t1); stat->utime[COMM] += t2; #endif usub = Usub_buf; uval = Uval_buf; #if ( DEBUGlevel>=2 ) printf("(%d) Recv U(%4d,:) from Pr %2d\n", iam, k, krow); #endif #if ( PRNTlevel==3 ) ++total_msg; if ( !msgcnt[2] ) ++zero_msg; #endif } else msgcnt[2] = 0; } /* if myrow == Pr(k) */ /* * Parallel rank-k update; pair up blocks L(i,k) and U(k,j). * for (j = k+1; k < N; ++k) { * for (i = k+1; i < N; ++i) * if ( myrow == PROW( i, grid ) && mycol == PCOL( j, grid ) * && L(i,k) != 0 && U(k,j) != 0 ) * A(i,j) = A(i,j) - L(i,k) * U(k,j); */ msg0 = msgcnt[0]; msg2 = msgcnt[2]; if ( msg0 && msg2 ) { /* L(:,k) and U(k,:) are not empty. */ nsupr = lsub[1]; /* LDA of lusup. */ if ( myrow == krow ) { /* Skip diagonal block L(k,k). */ lptr0 = BC_HEADER + LB_DESCRIPTOR + lsub[BC_HEADER+1]; luptr0 = knsupc; nlb = lsub[0] - 1; } else { lptr0 = BC_HEADER; luptr0 = 0; nlb = lsub[0]; } lptr = lptr0; for (lb = 0; lb < nlb; ++lb) { /* Initialize block row pointers. */ ib = lsub[lptr]; lib = LBi( ib, grid ); iuip[lib] = BR_HEADER; ruip[lib] = 0; lptr += LB_DESCRIPTOR + lsub[lptr+1]; } nub = usub[0]; /* Number of blocks in the block row U(k,:) */ iukp = BR_HEADER; /* Skip header; Pointer to index[] of U(k,:) */ rukp = 0; /* Pointer to nzval[] of U(k,:) */ klst = FstBlockC( k+1 ); /* --------------------------------------------------- Update the first block column A(:,k+1). --------------------------------------------------- */ jb = usub[iukp]; /* Global block number of block U(k,j). */ if ( jb == k+1 ) { /* First update (k+1)-th block. */ --nub; lptr = lptr0; luptr = luptr0; ljb = LBj( jb, grid ); /* Local block number of U(k,j). */ nsupc = SuperSize( jb ); iukp += UB_DESCRIPTOR; /* Start fstnz of block U(k,j). */ /* Prepare to call DGEMM. */ jj = iukp; while ( usub[jj] == klst ) ++jj; ldu = klst - usub[jj++]; ncols = 1; full = 1; for (; jj < iukp+nsupc; ++jj) { segsize = klst - usub[jj]; if ( segsize ) { ++ncols; if ( segsize != ldu ) full = 0; if ( segsize > ldu ) ldu = segsize; } } #if ( DEBUGlevel>=3 ) ++num_update; #endif if ( full ) { tempu = &uval[rukp]; } else { /* Copy block U(k,j) into tempU2d. */ #if ( DEBUGlevel>=3 ) printf("(%d) full=%d,k=%d,jb=%d,ldu=%d,ncols=%d,nsupc=%d\n", iam, full, k, jb, ldu, ncols, nsupc); ++num_copy; #endif tempu = tempU2d; for (jj = iukp; jj < iukp+nsupc; ++jj) { segsize = klst - usub[jj]; if ( segsize ) { lead_zero = ldu - segsize; for (i = 0; i < lead_zero; ++i) tempu[i] = 0.0; tempu += lead_zero; for (i = 0; i < segsize; ++i) tempu[i] = uval[rukp+i]; rukp += segsize; tempu += segsize; } } tempu = tempU2d; rukp -= usub[iukp - 1]; /* Return to start of U(k,j). */ } /* if full ... */ for (lb = 0; lb < nlb; ++lb) { ib = lsub[lptr]; /* Row block L(i,k). */ nbrow = lsub[lptr+1]; /* Number of full rows. */ lptr += LB_DESCRIPTOR; /* Skip descriptor. */ tempv = tempv2d; #ifdef _CRAY SGEMM(ftcs, ftcs, &nbrow, &ncols, &ldu, &alpha, &lusup[luptr+(knsupc-ldu)*nsupr], &nsupr, tempu, &ldu, &beta, tempv, &ldt); #elif defined (USE_VENDOR_BLAS) dgemm_("N", "N", &nbrow, &ncols, &ldu, &alpha, &lusup[luptr+(knsupc-ldu)*nsupr], &nsupr, tempu, &ldu, &beta, tempv, &ldt, 1, 1); #else dgemm_("N", "N", &nbrow, &ncols, &ldu, &alpha, &lusup[luptr+(knsupc-ldu)*nsupr], &nsupr, tempu, &ldu, &beta, tempv, &ldt); #endif stat->ops[FACT] += 2 * nbrow * ldu * ncols; /* Now gather the result into the destination block. */ if ( ib < jb ) { /* A(i,j) is in U. */ ilst = FstBlockC( ib+1 ); lib = LBi( ib, grid ); index = Ufstnz_br_ptr[lib]; ijb = index[iuip[lib]]; while ( ijb < jb ) { /* Search for dest block. */ ruip[lib] += index[iuip[lib]+1]; iuip[lib] += UB_DESCRIPTOR + SuperSize( ijb ); ijb = index[iuip[lib]]; } iuip[lib] += UB_DESCRIPTOR; /* Skip descriptor. */ tempv = tempv2d; for (jj = 0; jj < nsupc; ++jj) { segsize = klst - usub[iukp + jj]; fnz = index[iuip[lib]++]; if ( segsize ) { /* Nonzero segment in U(k.j). */ ucol = &Unzval_br_ptr[lib][ruip[lib]]; for (i = 0, it = 0; i < nbrow; ++i) { rel = lsub[lptr + i] - fnz; ucol[rel] -= tempv[it++]; } tempv += ldt; } ruip[lib] += ilst - fnz; } } else { /* A(i,j) is in L. */ index = Lrowind_bc_ptr[ljb]; ldv = index[1]; /* LDA of the dest lusup. */ lptrj = BC_HEADER; luptrj = 0; ijb = index[lptrj]; while ( ijb != ib ) { /* Search for dest block -- blocks are not ordered! */ luptrj += index[lptrj+1]; lptrj += LB_DESCRIPTOR + index[lptrj+1]; ijb = index[lptrj]; } /* * Build indirect table. This is needed because the * indices are not sorted. */ fnz = FstBlockC( ib ); lptrj += LB_DESCRIPTOR; for (i = 0; i < index[lptrj-1]; ++i) { rel = index[lptrj + i] - fnz; indirect[rel] = i; } nzval = Lnzval_bc_ptr[ljb] + luptrj; tempv = tempv2d; for (jj = 0; jj < nsupc; ++jj) { segsize = klst - usub[iukp + jj]; if ( segsize ) { /*#pragma _CRI cache_bypass nzval,tempv*/ for (it = 0, i = 0; i < nbrow; ++i) { rel = lsub[lptr + i] - fnz; nzval[indirect[rel]] -= tempv[it++]; } tempv += ldt; } nzval += ldv; } } /* if ib < jb ... */ lptr += nbrow; luptr += nbrow; } /* for lb ... */ rukp += usub[iukp - 1]; /* Move to block U(k,j+1) */ iukp += nsupc; } /* if jb == k+1 */ } /* if L(:,k) and U(k,:) not empty */ if ( k+1 < nsupers ) { kcol = PCOL( k+1, grid ); if ( mycol == kcol ) { #if ( VAMPIR>=1 ) VT_begin(5); #endif /* Factor diagonal and subdiagonal blocks and test for exact singularity. */ pdgstrf2(options, k+1, thresh, Glu_persist, grid, Llu, stat, info); #if ( VAMPIR>=1 ) VT_end(5); #endif /* Process column *kcol+1* multicasts numeric values of L(:,k+1) to process rows. */ lk = LBj( k+1, grid ); /* Local block number. */ lsub1 = Lrowind_bc_ptr[lk]; if ( lsub1 ) { msgcnt[0] = lsub1[1] + BC_HEADER + lsub1[0]*LB_DESCRIPTOR; msgcnt[1] = lsub1[1] * SuperSize( k+1 ); } else { msgcnt[0] = 0; msgcnt[1] = 0; } scp = &grid->rscp; /* The scope of process row. */ for (pj = 0; pj < Pc; ++pj) { if ( ToSendR[lk][pj] != EMPTY ) { lusup1 = Lnzval_bc_ptr[lk]; #if ( PROFlevel>=1 ) TIC(t1); #endif #if ( VAMPIR>=1 ) VT_begin(1); #endif MPI_Isend( lsub1, msgcnt[0], mpi_int_t, pj, (4*(k+1))%NTAGS, scp->comm, &send_req[pj] ); MPI_Isend( lusup1, msgcnt[1], MPI_DOUBLE, pj, (4*(k+1)+1)%NTAGS, scp->comm, &send_req[pj+Pc] ); #if ( VAMPIR>=1 ) VT_end(1); #endif #if ( PROFlevel>=1 ) TOC(t2, t1); stat->utime[COMM] += t2; msg_cnt += 2; msg_vol += msgcnt[0]*iword + msgcnt[1]*dword; #endif #if ( DEBUGlevel>=2 ) printf("(%d) Send L(:,%4d): lsub %4d, lusup %4d to Pc %2d\n", iam, k+1, msgcnt[0], msgcnt[1], pj); #endif } } /* for pj ... */ } else { /* Post Recv of block column L(:,k+1). */ if ( ToRecv[k+1] >= 1 ) { scp = &grid->rscp; /* The scope of process row. */ MPI_Irecv(Lsub_buf_2[(k+1)%2], Llu->bufmax[0], mpi_int_t, kcol, (4*(k+1))%NTAGS, scp->comm, &recv_req[0]); MPI_Irecv(Lval_buf_2[(k+1)%2], Llu->bufmax[1], MPI_DOUBLE, kcol, (4*(k+1)+1)%NTAGS, scp->comm, &recv_req[1]); #if ( DEBUGlevel>=2 ) printf("(%d) Post Irecv L(:,%4d)\n", iam, k+1); #endif } } /* if mycol == Pc(k+1) */ } /* if k+1 < nsupers */ if ( msg0 && msg2 ) { /* L(:,k) and U(k,:) are not empty. */ /* --------------------------------------------------- Update all other blocks using block row U(k,:) --------------------------------------------------- */ for (j = 0; j < nub; ++j) { lptr = lptr0; luptr = luptr0; jb = usub[iukp]; /* Global block number of block U(k,j). */ ljb = LBj( jb, grid ); /* Local block number of U(k,j). */ nsupc = SuperSize( jb ); iukp += UB_DESCRIPTOR; /* Start fstnz of block U(k,j). */ /* Prepare to call DGEMM. */ jj = iukp; while ( usub[jj] == klst ) ++jj; ldu = klst - usub[jj++]; ncols = 1; full = 1; for (; jj < iukp+nsupc; ++jj) { segsize = klst - usub[jj]; if ( segsize ) { ++ncols; if ( segsize != ldu ) full = 0; if ( segsize > ldu ) ldu = segsize; } } #if ( DEBUGlevel>=3 ) printf("(%d) full=%d,k=%d,jb=%d,ldu=%d,ncols=%d,nsupc=%d\n", iam, full, k, jb, ldu, ncols, nsupc); ++num_update; #endif if ( full ) { tempu = &uval[rukp]; } else { /* Copy block U(k,j) into tempU2d. */ #if ( DEBUGlevel>=3 ) ++num_copy; #endif tempu = tempU2d; for (jj = iukp; jj < iukp+nsupc; ++jj) { segsize = klst - usub[jj]; if ( segsize ) { lead_zero = ldu - segsize; for (i = 0; i < lead_zero; ++i) tempu[i] = 0.0; tempu += lead_zero; for (i = 0; i < segsize; ++i) tempu[i] = uval[rukp+i]; rukp += segsize; tempu += segsize; } } tempu = tempU2d; rukp -= usub[iukp - 1]; /* Return to start of U(k,j). */ } /* if full ... */ for (lb = 0; lb < nlb; ++lb) { ib = lsub[lptr]; /* Row block L(i,k). */ nbrow = lsub[lptr+1]; /* Number of full rows. */ lptr += LB_DESCRIPTOR; /* Skip descriptor. */ tempv = tempv2d; #ifdef _CRAY SGEMM(ftcs, ftcs, &nbrow, &ncols, &ldu, &alpha, &lusup[luptr+(knsupc-ldu)*nsupr], &nsupr, tempu, &ldu, &beta, tempv, &ldt); #elif defined (USE_VENDOR_BLAS) dgemm_("N", "N", &nbrow, &ncols, &ldu, &alpha, &lusup[luptr+(knsupc-ldu)*nsupr], &nsupr, tempu, &ldu, &beta, tempv, &ldt, 1, 1); #else dgemm_("N", "N", &nbrow, &ncols, &ldu, &alpha, &lusup[luptr+(knsupc-ldu)*nsupr], &nsupr, tempu, &ldu, &beta, tempv, &ldt); #endif stat->ops[FACT] += 2 * nbrow * ldu * ncols; /* Now gather the result into the destination block. */ if ( ib < jb ) { /* A(i,j) is in U. */ ilst = FstBlockC( ib+1 ); lib = LBi( ib, grid ); index = Ufstnz_br_ptr[lib]; ijb = index[iuip[lib]]; while ( ijb < jb ) { /* Search for dest block. */ ruip[lib] += index[iuip[lib]+1]; iuip[lib] += UB_DESCRIPTOR + SuperSize( ijb ); ijb = index[iuip[lib]]; } /* Skip descriptor. Now point to fstnz index of block U(i,j). */ iuip[lib] += UB_DESCRIPTOR; tempv = tempv2d; for (jj = 0; jj < nsupc; ++jj) { segsize = klst - usub[iukp + jj]; fnz = index[iuip[lib]++]; if ( segsize ) { /* Nonzero segment in U(k.j). */ ucol = &Unzval_br_ptr[lib][ruip[lib]]; for (i = 0 ; i < nbrow; ++i) { rel = lsub[lptr + i] - fnz; ucol[rel] -= tempv[i]; } tempv += ldt; } ruip[lib] += ilst - fnz; } } else { /* A(i,j) is in L. */ index = Lrowind_bc_ptr[ljb]; ldv = index[1]; /* LDA of the dest lusup. */ lptrj = BC_HEADER; luptrj = 0; ijb = index[lptrj]; while ( ijb != ib ) { /* Search for dest block -- blocks are not ordered! */ luptrj += index[lptrj+1]; lptrj += LB_DESCRIPTOR + index[lptrj+1]; ijb = index[lptrj]; } /* * Build indirect table. This is needed because the * indices are not sorted for the L blocks. */ fnz = FstBlockC( ib ); lptrj += LB_DESCRIPTOR; for (i = 0; i < index[lptrj-1]; ++i) { rel = index[lptrj + i] - fnz; indirect[rel] = i; } nzval = Lnzval_bc_ptr[ljb] + luptrj; tempv = tempv2d; for (jj = 0; jj < nsupc; ++jj) { segsize = klst - usub[iukp + jj]; if ( segsize ) { /*#pragma _CRI cache_bypass nzval,tempv*/ for (i = 0; i < nbrow; ++i) { rel = lsub[lptr + i] - fnz; nzval[indirect[rel]] -= tempv[i]; } tempv += ldt; } nzval += ldv; } } /* if ib < jb ... */ lptr += nbrow; luptr += nbrow; } /* for lb ... */ rukp += usub[iukp - 1]; /* Move to block U(k,j+1) */ iukp += nsupc; } /* for j ... */ } /* if k L(:,k) and U(k,:) are not empty */ } /* ------------------------------------------ END MAIN LOOP: for k = ... ------------------------------------------ */ #if ( VAMPIR>=1 ) VT_end(100); VT_traceoff(); #endif if ( Pr*Pc > 1 ) { SUPERLU_FREE(Lsub_buf_2[0]); /* also free Lsub_buf_2[1] */ SUPERLU_FREE(Lval_buf_2[0]); /* also free Lval_buf_2[1] */ if ( Llu->bufmax[2] != 0 ) SUPERLU_FREE(Usub_buf); if ( Llu->bufmax[3] != 0 ) SUPERLU_FREE(Uval_buf); SUPERLU_FREE(send_req); } SUPERLU_FREE(Llu->ujrow); SUPERLU_FREE(tempv2d); SUPERLU_FREE(indirect); SUPERLU_FREE(iuip); SUPERLU_FREE(ruip); /* Prepare error message. */ if ( *info == 0 ) *info = n + 1; #if ( PROFlevel>=1 ) TIC(t1); #endif MPI_Allreduce( info, &iinfo, 1, mpi_int_t, MPI_MIN, grid->comm ); #if ( PROFlevel>=1 ) TOC(t2, t1); stat->utime[COMM] += t2; { float msg_vol_max, msg_vol_sum, msg_cnt_max, msg_cnt_sum; MPI_Reduce( &msg_cnt, &msg_cnt_sum, 1, MPI_FLOAT, MPI_SUM, 0, grid->comm ); MPI_Reduce( &msg_cnt, &msg_cnt_max, 1, MPI_FLOAT, MPI_MAX, 0, grid->comm ); MPI_Reduce( &msg_vol, &msg_vol_sum, 1, MPI_FLOAT, MPI_SUM, 0, grid->comm ); MPI_Reduce( &msg_vol, &msg_vol_max, 1, MPI_FLOAT, MPI_MAX, 0, grid->comm ); if ( !iam ) { printf("\tPDGSTRF comm stat:" "\tAvg\tMax\t\tAvg\tMax\n" "\t\t\tCount:\t%.0f\t%.0f\tVol(MB)\t%.2f\t%.2f\n", msg_cnt_sum/Pr/Pc, msg_cnt_max, msg_vol_sum/Pr/Pc*1e-6, msg_vol_max*1e-6); } } #endif if ( iinfo == n + 1 ) *info = 0; else *info = iinfo; #if ( PRNTlevel==3 ) MPI_Allreduce( &zero_msg, &iinfo, 1, mpi_int_t, MPI_SUM, grid->comm ); if ( !iam ) printf(".. # msg of zero size\t%d\n", iinfo); MPI_Allreduce( &total_msg, &iinfo, 1, mpi_int_t, MPI_SUM, grid->comm ); if ( !iam ) printf(".. # total msg\t%d\n", iinfo); #endif #if ( DEBUGlevel>=2 ) for (i = 0; i < Pr * Pc; ++i) { if ( iam == i ) { dPrintLblocks(iam, nsupers, grid, Glu_persist, Llu); dPrintUblocks(iam, nsupers, grid, Glu_persist, Llu); printf("(%d)\n", iam); PrintInt10("Recv", nsupers, Llu->ToRecv); } MPI_Barrier( grid->comm ); } #endif #if ( DEBUGlevel>=3 ) printf("(%d) num_copy=%d, num_update=%d\n", iam, num_copy, num_update); #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pdgstrf()"); #endif } /* PDGSTRF */ /************************************************************************/ /*! \brief * *
 * Purpose
 * =======
 *   Factor diagonal and subdiagonal blocks and test for exact singularity.
 *   Only the process column that owns block column *k* participates
 *   in the work.
 * 
 * Arguments
 * =========
 *
 * k      (input) int (global)
 *        The column number of the block column to be factorized.
 *
 * thresh (input) double (global)
 *        The threshold value = s_eps * anorm.
 *
 * Glu_persist (input) Glu_persist_t*
 *        Global data structures (xsup, supno) replicated on all processes.
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh.
 *
 * Llu    (input/output) LocalLU_t*
 *        Local data structures to store distributed L and U matrices.
 *
 * stat   (output) SuperLUStat_t*
 *        Record the statistics about the factorization.
 *        See SuperLUStat_t structure defined in util.h.
 *
 * info   (output) int*
 *        = 0: successful exit
 *        < 0: if info = -i, the i-th argument had an illegal value
 *        > 0: if info = i, U(i,i) is exactly zero. The factorization has
 *             been completed, but the factor U is exactly singular,
 *             and division by zero will occur if it is used to solve a
 *             system of equations.
 * 
*/ static void pdgstrf2 /************************************************************************/ ( superlu_options_t *options, int_t k, double thresh, Glu_persist_t *Glu_persist, gridinfo_t *grid, LocalLU_t *Llu, SuperLUStat_t *stat, int* info ) { int c, iam, l, pkk; int incx = 1, incy = 1; int nsupr; /* number of rows in the block (LDA) */ int luptr; int_t i, krow, j, jfst, jlst; int_t nsupc; /* number of columns in the block */ int_t *xsup = Glu_persist->xsup; double *lusup, temp; double *ujrow; double alpha = -1; *info = 0; /* Quick return. */ /* Initialization. */ iam = grid->iam; krow = PROW( k, grid ); pkk = PNUM( PROW(k, grid), PCOL(k, grid), grid ); j = LBj( k, grid ); /* Local block number */ jfst = FstBlockC( k ); jlst = FstBlockC( k+1 ); lusup = Llu->Lnzval_bc_ptr[j]; nsupc = SuperSize( k ); if ( Llu->Lrowind_bc_ptr[j] ) nsupr = Llu->Lrowind_bc_ptr[j][1]; ujrow = Llu->ujrow; luptr = 0; /* Point to the diagonal entries. */ c = nsupc; for (j = 0; j < jlst - jfst; ++j) { /* Broadcast the j-th row (nsupc - j) elements to the process column. */ if ( iam == pkk ) { /* Diagonal process. */ i = luptr; if ( options->ReplaceTinyPivot == YES || lusup[i] == 0.0 ) { if ( fabs(lusup[i]) < thresh ) { /* Diagonal */ #if ( PRNTlevel>=2 ) printf("(%d) .. col %d, tiny pivot %e ", iam, jfst+j, lusup[i]); #endif /* Keep the replaced diagonal with the same sign. */ if ( lusup[i] < 0 ) lusup[i] = -thresh; else lusup[i] = thresh; #if ( PRNTlevel>=2 ) printf("replaced by %e\n", lusup[i]); #endif ++(stat->TinyPivots); } } for (l = 0; l < c; ++l, i += nsupr) ujrow[l] = lusup[i]; } #if 0 dbcast_col(ujrow, c, pkk, UjROW, grid, &c); #else MPI_Bcast(ujrow, c, MPI_DOUBLE, krow, (grid->cscp).comm); /*bcast_tree(ujrow, c, MPI_DOUBLE, krow, (24*k+j)%NTAGS, grid, COMM_COLUMN, &c);*/ #endif #if ( DEBUGlevel>=2 ) if ( k == 3329 && j == 2 ) { if ( iam == pkk ) { printf("..(%d) k %d, j %d: Send ujrow[0] %e\n",iam,k,j,ujrow[0]); } else { printf("..(%d) k %d, j %d: Recv ujrow[0] %e\n",iam,k,j,ujrow[0]); } } #endif if ( !lusup ) { /* Empty block column. */ --c; if ( ujrow[0] == 0.0 ) *info = j+jfst+1; continue; } /* Test for singularity. */ if ( ujrow[0] == 0.0 ) { *info = j+jfst+1; } else { /* Scale the j-th column of the matrix. */ temp = 1.0 / ujrow[0]; if ( iam == pkk ) { for (i = luptr+1; i < luptr-j+nsupr; ++i) lusup[i] *= temp; stat->ops[FACT] += nsupr-j-1; } else { for (i = luptr; i < luptr+nsupr; ++i) lusup[i] *= temp; stat->ops[FACT] += nsupr; } } /* Rank-1 update of the trailing submatrix. */ if ( --c ) { if ( iam == pkk ) { l = nsupr - j - 1; #ifdef _CRAY SGER(&l, &c, &alpha, &lusup[luptr+1], &incx, &ujrow[1], &incy, &lusup[luptr+nsupr+1], &nsupr); #else dger_(&l, &c, &alpha, &lusup[luptr+1], &incx, &ujrow[1], &incy, &lusup[luptr+nsupr+1], &nsupr); #endif stat->ops[FACT] += 2 * l * c; } else { #ifdef _CRAY SGER(&nsupr, &c, &alpha, &lusup[luptr], &incx, &ujrow[1], &incy, &lusup[luptr+nsupr], &nsupr); #else dger_(&nsupr, &c, &alpha, &lusup[luptr], &incx, &ujrow[1], &incy, &lusup[luptr+nsupr], &nsupr); #endif stat->ops[FACT] += 2 * nsupr * c; } } /* Move to the next column. */ if ( iam == pkk ) luptr += nsupr + 1; else luptr += nsupr; } /* for j ... */ } /* PDGSTRF2 */ /************************************************************************/ static void pdgstrs2 /************************************************************************/ #ifdef _CRAY ( int_t m, int_t k, Glu_persist_t *Glu_persist, gridinfo_t *grid, LocalLU_t *Llu, SuperLUStat_t *stat, _fcd ftcs1, _fcd ftcs2, _fcd ftcs3 ) #else ( int_t m, int_t k, Glu_persist_t *Glu_persist, gridinfo_t *grid, LocalLU_t *Llu, SuperLUStat_t *stat ) #endif /* * Purpose * ======= * Perform parallel triangular solves * U(k,:) := A(k,:) \ L(k,k). * Only the process row that owns block row *k* participates * in the work. * * Arguments * ========= * * m (input) int (global) * Number of rows in the matrix. * * k (input) int (global) * The row number of the block row to be factorized. * * Glu_persist (input) Glu_persist_t* * Global data structures (xsup, supno) replicated on all processes. * * grid (input) gridinfo_t* * The 2D process mesh. * * Llu (input/output) LocalLU_t* * Local data structures to store distributed L and U matrices. * * stat (output) SuperLUStat_t* * Record the statistics about the factorization; * See SuperLUStat_t structure defined in util.h. * */ { int iam, pkk; int incx = 1; int nsupr; /* number of rows in the block L(:,k) (LDA) */ int segsize; int_t nsupc; /* number of columns in the block */ int_t luptr, iukp, rukp; int_t b, gb, j, klst, knsupc, lk, nb; int_t *xsup = Glu_persist->xsup; int_t *usub; double *lusup, *uval; /* Quick return. */ lk = LBi( k, grid ); /* Local block number */ if ( !Llu->Unzval_br_ptr[lk] ) return; /* Initialization. */ iam = grid->iam; pkk = PNUM( PROW(k, grid), PCOL(k, grid), grid ); klst = FstBlockC( k+1 ); knsupc = SuperSize( k ); usub = Llu->Ufstnz_br_ptr[lk]; /* index[] of block row U(k,:) */ uval = Llu->Unzval_br_ptr[lk]; nb = usub[0]; iukp = BR_HEADER; rukp = 0; if ( iam == pkk ) { lk = LBj( k, grid ); nsupr = Llu->Lrowind_bc_ptr[lk][1]; /* LDA of lusup[] */ lusup = Llu->Lnzval_bc_ptr[lk]; } else { nsupr = Llu->Lsub_buf_2[k%2][1]; /* LDA of lusup[] */ lusup = Llu->Lval_buf_2[k%2]; } /* Loop through all the row blocks. */ for (b = 0; b < nb; ++b) { gb = usub[iukp]; nsupc = SuperSize( gb ); iukp += UB_DESCRIPTOR; /* Loop through all the segments in the block. */ for (j = 0; j < nsupc; ++j) { segsize = klst - usub[iukp++]; if ( segsize ) { /* Nonzero segment. */ luptr = (knsupc - segsize) * (nsupr + 1); #ifdef _CRAY STRSV(ftcs1, ftcs2, ftcs3, &segsize, &lusup[luptr], &nsupr, &uval[rukp], &incx); #elif defined (USE_VENDOR_BLAS) dtrsv_("L", "N", "U", &segsize, &lusup[luptr], &nsupr, &uval[rukp], &incx, 1, 1, 1); #else dtrsv_("L", "N", "U", &segsize, &lusup[luptr], &nsupr, &uval[rukp], &incx); #endif stat->ops[FACT] += segsize * (segsize + 1); rukp += segsize; } } } /* for b ... */ } /* PDGSTRS2 */ static int probe_recv(int iam, int source, int tag, MPI_Datatype datatype, MPI_Comm comm, int buf_size) { MPI_Status status; int count; MPI_Probe( source, tag, comm, &status ); MPI_Get_count( &status, datatype, &count ); if ( count > buf_size ) { printf("(%d) Recv'ed count %d > buffer size $d\n", iam, count, buf_size); exit(-1); } return 0; } SuperLU_DIST_5.3.0/SRC/superlu_grid.c0000644013363400111340000001231313233431301016133 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief SuperLU grid utilities * *
 * -- Distributed SuperLU routine (version 1.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 * 
*/ #include "superlu_ddefs.h" /* Define global variables */ MPI_Datatype SuperLU_MPI_DOUBLE_COMPLEX = MPI_DATATYPE_NULL; /*! \brief All processes in the MPI communicator must call this routine. */ void superlu_gridinit(MPI_Comm Bcomm, /* The base communicator upon which the new grid is formed. */ int_t nprow, int_t npcol, gridinfo_t *grid) { int Np = nprow * npcol; int_t *usermap; int i, j, info; /* Make a list of the processes in the new communicator. */ usermap = (int_t *) SUPERLU_MALLOC(Np*sizeof(int_t)); for (j = 0; j < npcol; ++j) for (i = 0; i < nprow; ++i) usermap[j*nprow+i] = i*npcol+j; /* Check MPI environment initialization. */ MPI_Initialized( &info ); if ( !info ) ABORT("C main program must explicitly call MPI_Init()"); MPI_Comm_size( Bcomm, &info ); if ( info < Np ) ABORT("Number of processes is smaller than NPROW * NPCOL"); superlu_gridmap(Bcomm, nprow, npcol, usermap, nprow, grid); SUPERLU_FREE(usermap); } /*! \brief All processes in the MPI communicator must call this routine. */ void superlu_gridmap( MPI_Comm Bcomm, /* The base communicator upon which the new grid is formed. */ int_t nprow, int_t npcol, int_t usermap[], /* usermap(i,j) holds the process number to be placed in {i,j} of the process grid. */ int_t ldumap, /* The leading dimension of the 2D array usermap[]. */ gridinfo_t *grid) { MPI_Group mpi_base_group, superlu_grp; int Np = nprow * npcol, mycol, myrow; int *pranks; int i, j, info; /* Create datatype in C for MPI complex. */ if ( SuperLU_MPI_DOUBLE_COMPLEX == MPI_DATATYPE_NULL ) { MPI_Type_contiguous( 2, MPI_DOUBLE, &SuperLU_MPI_DOUBLE_COMPLEX ); MPI_Type_commit( &SuperLU_MPI_DOUBLE_COMPLEX ); } /* Check MPI environment initialization. */ MPI_Initialized( &info ); if ( !info ) ABORT("C main program must explicitly call MPI_Init()"); grid->nprow = nprow; grid->npcol = npcol; /* Make a list of the processes in the new communicator. */ pranks = (int *) SUPERLU_MALLOC(Np*sizeof(int)); for (j = 0; j < npcol; ++j) for (i = 0; i < nprow; ++i) pranks[i*npcol+j] = usermap[j*ldumap+i]; /* * Form MPI communicator for all. */ /* Get the group underlying Bcomm. */ MPI_Comm_group( Bcomm, &mpi_base_group ); /* Create the new group. */ MPI_Group_incl( mpi_base_group, Np, pranks, &superlu_grp ); /* Create the new communicator. */ /* NOTE: The call is to be executed by all processes in Bcomm, even if they do not belong in the new group -- superlu_grp. */ MPI_Comm_create( Bcomm, superlu_grp, &grid->comm ); /* Bail out if I am not in the group, superlu_group. */ if ( grid->comm == MPI_COMM_NULL ) { grid->comm = Bcomm; MPI_Comm_rank( Bcomm, &i ); grid->iam = i; /*grid->iam = -1;*/ SUPERLU_FREE(pranks); return; } MPI_Comm_rank( grid->comm, &(grid->iam) ); myrow = grid->iam / npcol; mycol = grid->iam % npcol; /* * Form MPI communicator for myrow, scope = COMM_ROW. */ #if 0 for (i = 0; i < npcol; ++i) pranks[i] = myrow*npcol + i; MPI_Comm_group( grid->comm, &superlu_grp ); /* Find all's group */ MPI_Group_incl( superlu_grp, npcol, pranks, &grp ); /* Form new group */ MPI_Comm_create( grid->comm, grp, &grid->rscp.comm );/* Create new comm */ #else MPI_Comm_split(grid->comm, myrow, mycol, &(grid->rscp.comm)); #endif /* * Form MPI communicator for mycol, scope = COMM_COLUMN. */ #if 0 for (i = 0; i < nprow; ++i) pranks[i] = i*npcol + mycol; MPI_Group_incl( superlu_grp, nprow, pranks, &grp ); /* Form new group */ MPI_Comm_create( grid->comm, grp, &grid->cscp.comm );/* Create new comm */ #else MPI_Comm_split(grid->comm, mycol, myrow, &(grid->cscp.comm)); #endif grid->rscp.Np = npcol; grid->rscp.Iam = mycol; grid->cscp.Np = nprow; grid->cscp.Iam = myrow; #if 0 { int tag_ub; if ( !grid->iam ) { MPI_Attr_get(Bcomm, MPI_TAG_UB, &tag_ub, &info); printf("MPI_TAG_UB %d\n", tag_ub); /* returns 4295677672 In reality it is restricted to no greater than 16384. */ } exit(0); } #endif SUPERLU_FREE(pranks); MPI_Group_free(&superlu_grp); MPI_Group_free(&mpi_base_group); } void superlu_gridexit(gridinfo_t *grid) { if ( grid->comm != MPI_COMM_NULL && grid->comm != MPI_COMM_WORLD ) { /* Marks the communicator objects for deallocation. */ MPI_Comm_free( &grid->rscp.comm ); MPI_Comm_free( &grid->cscp.comm ); MPI_Comm_free( &grid->comm ); } if ( SuperLU_MPI_DOUBLE_COMPLEX != MPI_DATATYPE_NULL ) { MPI_Type_free( &SuperLU_MPI_DOUBLE_COMPLEX ); } } SuperLU_DIST_5.3.0/SRC/superlu_dist_version.c0000644013363400111340000000151713233431301017722 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /** @file superlu_dist_version.h * \brief Gets the SuperLU_DIST's version information from the library. * * -- Distributed SuperLU routine (version 5.2) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley, * October 13, 2017 * */ #include "superlu_defs.h" int superlu_dist_GetVersionNumber(int *major, int *minor, int *bugfix) { if (major) *major = SUPERLU_DIST_MAJOR_VERSION; if (minor) *minor = SUPERLU_DIST_MINOR_VERSION; if (bugfix) *bugfix = SUPERLU_DIST_PATCH_VERSION; return 0; } SuperLU_DIST_5.3.0/SRC/zdistribute.c0000644013363400111340000006223713233431301016011 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Distribute the matrix onto the 2D process mesh. * *
 * -- Distributed SuperLU routine (version 2.3) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * October 15, 2008
 * 
*/ #include "superlu_zdefs.h" /*! \brief * *
 * Purpose
 * =======
 *   Distribute the matrix onto the 2D process mesh.
 * 
 * Arguments
 * =========
 * 
 * fact (input) fact_t
 *        Specifies whether or not the L and U structures will be re-used.
 *        = SamePattern_SameRowPerm: L and U structures are input, and
 *                                   unchanged on exit.
 *        = DOFACT or SamePattern: L and U structures are computed and output.
 *
 * n      (input) int
 *        Dimension of the matrix.
 *
 * A      (input) SuperMatrix*
 *	  The original matrix A, permuted by columns, of dimension
 *        (A->nrow, A->ncol). The type of A can be:
 *        Stype = SLU_NCP; Dtype = SLU_Z; Mtype = SLU_GE.
 *
 * LUstruct (input) LUstruct_t*
 *        Data structures for L and U factors.
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh.
 *
 * Return value
 * ============
 *   > 0, working storage required (in bytes).
 * 
*/ float zdistribute(fact_t fact, int_t n, SuperMatrix *A, Glu_freeable_t *Glu_freeable, LUstruct_t *LUstruct, gridinfo_t *grid) { Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; int_t bnnz, fsupc, fsupc1, i, ii, irow, istart, j, jb, jj, k, len, len1, nsupc; int_t ljb; /* local block column number */ int_t nrbl; /* number of L blocks in current block column */ int_t nrbu; /* number of U blocks in current block column */ int_t gb; /* global block number; 0 < gb <= nsuper */ int_t lb; /* local block number; 0 < lb <= ceil(NSUPERS/Pr) */ int iam, jbrow, kcol, mycol, myrow, pc, pr; int_t mybufmax[NBUFFERS]; NCPformat *Astore; doublecomplex *a; int_t *asub; int_t *xa_begin, *xa_end; int_t *xsup = Glu_persist->xsup; /* supernode and column mapping */ int_t *supno = Glu_persist->supno; int_t *lsub, *xlsub, *usub, *xusub; int_t nsupers; int_t next_lind; /* next available position in index[*] */ int_t next_lval; /* next available position in nzval[*] */ int_t *index; /* indices consist of headers and row subscripts */ int *index1; /* temporary pointer to array of int */ doublecomplex *lusup, *uval; /* nonzero values in L and U */ doublecomplex **Lnzval_bc_ptr; /* size ceil(NSUPERS/Pc) */ int_t **Lrowind_bc_ptr; /* size ceil(NSUPERS/Pc) */ doublecomplex **Unzval_br_ptr; /* size ceil(NSUPERS/Pr) */ int_t **Ufstnz_br_ptr; /* size ceil(NSUPERS/Pr) */ /*-- Counts to be used in factorization. --*/ int *ToRecv, *ToSendD, **ToSendR; /*-- Counts to be used in lower triangular solve. --*/ int_t *fmod; /* Modification count for L-solve. */ int_t **fsendx_plist; /* Column process list to send down Xk. */ int_t nfrecvx = 0; /* Number of Xk I will receive. */ int_t nfsendx = 0; /* Number of Xk I will send */ int_t kseen; /*-- Counts to be used in upper triangular solve. --*/ int_t *bmod; /* Modification count for U-solve. */ int_t **bsendx_plist; /* Column process list to send down Xk. */ int_t nbrecvx = 0; /* Number of Xk I will receive. */ int_t nbsendx = 0; /* Number of Xk I will send */ int_t *ilsum; /* starting position of each supernode in the full array (local) */ /*-- Auxiliary arrays; freed on return --*/ int_t *rb_marker; /* block hit marker; size ceil(NSUPERS/Pr) */ int_t *Urb_length; /* U block length; size ceil(NSUPERS/Pr) */ int_t *Urb_indptr; /* pointers to U index[]; size ceil(NSUPERS/Pr) */ int_t *Urb_fstnz; /* # of fstnz in a block row; size ceil(NSUPERS/Pr) */ int_t *Ucbs; /* number of column blocks in a block row */ int_t *Lrb_length; /* L block length; size ceil(NSUPERS/Pr) */ int_t *Lrb_number; /* global block number; size ceil(NSUPERS/Pr) */ int_t *Lrb_indptr; /* pointers to L index[]; size ceil(NSUPERS/Pr) */ int_t *Lrb_valptr; /* pointers to L nzval[]; size ceil(NSUPERS/Pr) */ doublecomplex *dense, *dense_col; /* SPA */ doublecomplex zero = {0.0, 0.0}; int_t ldaspa; /* LDA of SPA */ int_t iword, zword; float mem_use = 0.0; #if ( PRNTlevel>=1 ) int_t nLblocks = 0, nUblocks = 0; #endif #if ( PROFlevel>=1 ) double t, t_u, t_l; int_t u_blks; #endif /* Initialization. */ iam = grid->iam; myrow = MYROW( iam, grid ); mycol = MYCOL( iam, grid ); for (i = 0; i < NBUFFERS; ++i) mybufmax[i] = 0; nsupers = supno[n-1] + 1; Astore = A->Store; a = Astore->nzval; asub = Astore->rowind; xa_begin = Astore->colbeg; xa_end = Astore->colend; #if ( PRNTlevel>=1 ) iword = sizeof(int_t); zword = sizeof(doublecomplex); #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter zdistribute()"); #endif if ( fact == SamePattern_SameRowPerm ) { /* --------------------------------------------------------------- * REUSE THE L AND U DATA STRUCTURES FROM A PREVIOUS FACTORIZATION. * --------------------------------------------------------------- */ #if ( PROFlevel>=1 ) t_l = t_u = 0; u_blks = 0; #endif /* We can propagate the new values of A into the existing L and U data structures. */ ilsum = Llu->ilsum; ldaspa = Llu->ldalsum; if ( !(dense = doublecomplexCalloc_dist(((size_t)ldaspa) * sp_ienv_dist(3))) ) ABORT("Calloc fails for SPA dense[]."); nrbu = CEILING( nsupers, grid->nprow ); /* No. of local block rows */ if ( !(Urb_length = intCalloc_dist(nrbu)) ) ABORT("Calloc fails for Urb_length[]."); if ( !(Urb_indptr = intMalloc_dist(nrbu)) ) ABORT("Malloc fails for Urb_indptr[]."); Lrowind_bc_ptr = Llu->Lrowind_bc_ptr; Lnzval_bc_ptr = Llu->Lnzval_bc_ptr; Ufstnz_br_ptr = Llu->Ufstnz_br_ptr; Unzval_br_ptr = Llu->Unzval_br_ptr; #if ( PRNTlevel>=1 ) mem_use += 2.0*nrbu*iword + ldaspa*sp_ienv_dist(3)*zword; #endif #if ( PROFlevel>=1 ) t = SuperLU_timer_(); #endif /* Initialize Uval to zero. */ for (lb = 0; lb < nrbu; ++lb) { Urb_indptr[lb] = BR_HEADER; /* Skip header in U index[]. */ index = Ufstnz_br_ptr[lb]; if ( index ) { uval = Unzval_br_ptr[lb]; len = index[1]; for (i = 0; i < len; ++i) uval[i] = zero; } /* if index != NULL */ } /* for lb ... */ for (jb = 0; jb < nsupers; ++jb) { /* Loop through each block column */ pc = PCOL( jb, grid ); if ( mycol == pc ) { /* Block column jb in my process column */ fsupc = FstBlockC( jb ); nsupc = SuperSize( jb ); /* Scatter A into SPA (for L), or into U directly. */ for (j = fsupc, dense_col = dense; j < FstBlockC(jb+1); ++j) { for (i = xa_begin[j]; i < xa_end[j]; ++i) { irow = asub[i]; gb = BlockNum( irow ); if ( myrow == PROW( gb, grid ) ) { lb = LBi( gb, grid ); if ( gb < jb ) { /* in U */ index = Ufstnz_br_ptr[lb]; uval = Unzval_br_ptr[lb]; while ( (k = index[Urb_indptr[lb]]) < jb ) { /* Skip nonzero values in this block */ Urb_length[lb] += index[Urb_indptr[lb]+1]; /* Move pointer to the next block */ Urb_indptr[lb] += UB_DESCRIPTOR + SuperSize( k ); } /*assert(k == jb);*/ /* start fstnz */ istart = Urb_indptr[lb] + UB_DESCRIPTOR; len = Urb_length[lb]; fsupc1 = FstBlockC( gb+1 ); k = j - fsupc; /* Sum the lengths of the leading columns */ for (jj = 0; jj < k; ++jj) len += fsupc1 - index[istart++]; /*assert(irow>=index[istart]);*/ uval[len + irow - index[istart]] = a[i]; } else { /* in L; put in SPA first */ irow = ilsum[lb] + irow - FstBlockC( gb ); dense_col[irow] = a[i]; } } } /* for i ... */ dense_col += ldaspa; } /* for j ... */ #if ( PROFlevel>=1 ) t_u += SuperLU_timer_() - t; t = SuperLU_timer_(); #endif /* Gather the values of A from SPA into Lnzval[]. */ ljb = LBj( jb, grid ); /* Local block number */ index = Lrowind_bc_ptr[ljb]; if ( index ) { nrbl = index[0]; /* Number of row blocks. */ len = index[1]; /* LDA of lusup[]. */ lusup = Lnzval_bc_ptr[ljb]; next_lind = BC_HEADER; next_lval = 0; for (jj = 0; jj < nrbl; ++jj) { gb = index[next_lind++]; len1 = index[next_lind++]; /* Rows in the block. */ lb = LBi( gb, grid ); for (bnnz = 0; bnnz < len1; ++bnnz) { irow = index[next_lind++]; /* Global index. */ irow = ilsum[lb] + irow - FstBlockC( gb ); k = next_lval++; for (j = 0, dense_col = dense; j < nsupc; ++j) { lusup[k] = dense_col[irow]; dense_col[irow] = zero; k += len; dense_col += ldaspa; } } /* for bnnz ... */ } /* for jj ... */ } /* if index ... */ #if ( PROFlevel>=1 ) t_l += SuperLU_timer_() - t; #endif } /* if mycol == pc */ } /* for jb ... */ SUPERLU_FREE(dense); SUPERLU_FREE(Urb_length); SUPERLU_FREE(Urb_indptr); #if ( PROFlevel>=1 ) if ( !iam ) printf(".. 2nd distribute time: L %.2f\tU %.2f\tu_blks %d\tnrbu %d\n", t_l, t_u, u_blks, nrbu); #endif } else { /* -------------------------------------------------- * FIRST TIME CREATING THE L AND U DATA STRUCTURE. * -------------------------------------------------- */ #if ( PROFlevel>=1 ) t_l = t_u = 0; u_blks = 0; #endif /* No L and U data structures are available yet. We need to set up the L and U data structures and propagate the values of A into them. */ lsub = Glu_freeable->lsub; /* compressed L subscripts */ xlsub = Glu_freeable->xlsub; usub = Glu_freeable->usub; /* compressed U subscripts */ xusub = Glu_freeable->xusub; if ( !(ToRecv = SUPERLU_MALLOC(nsupers * sizeof(int))) ) ABORT("Malloc fails for ToRecv[]."); for (i = 0; i < nsupers; ++i) ToRecv[i] = 0; k = CEILING( nsupers, grid->npcol );/* Number of local column blocks */ if ( !(ToSendR = (int **) SUPERLU_MALLOC(k*sizeof(int*))) ) ABORT("Malloc fails for ToSendR[]."); j = k * grid->npcol; if ( !(index1 = SUPERLU_MALLOC(j * sizeof(int))) ) ABORT("Malloc fails for index[]."); #if ( PRNTlevel>=1 ) mem_use += (float) k*sizeof(int_t*) + (j + nsupers)*iword; #endif for (i = 0; i < j; ++i) index1[i] = EMPTY; for (i = 0,j = 0; i < k; ++i, j += grid->npcol) ToSendR[i] = &index1[j]; k = CEILING( nsupers, grid->nprow ); /* Number of local block rows */ /* Pointers to the beginning of each block row of U. */ if ( !(Unzval_br_ptr = (doublecomplex**)SUPERLU_MALLOC(k * sizeof(doublecomplex*))) ) ABORT("Malloc fails for Unzval_br_ptr[]."); if ( !(Ufstnz_br_ptr = (int_t**)SUPERLU_MALLOC(k * sizeof(int_t*))) ) ABORT("Malloc fails for Ufstnz_br_ptr[]."); if ( !(ToSendD = SUPERLU_MALLOC(k * sizeof(int))) ) ABORT("Malloc fails for ToSendD[]."); for (i = 0; i < k; ++i) ToSendD[i] = NO; if ( !(ilsum = intMalloc_dist(k+1)) ) ABORT("Malloc fails for ilsum[]."); /* Auxiliary arrays used to set up U block data structures. They are freed on return. */ if ( !(rb_marker = intCalloc_dist(k)) ) ABORT("Calloc fails for rb_marker[]."); if ( !(Urb_length = intCalloc_dist(k)) ) ABORT("Calloc fails for Urb_length[]."); if ( !(Urb_indptr = intMalloc_dist(k)) ) ABORT("Malloc fails for Urb_indptr[]."); if ( !(Urb_fstnz = intCalloc_dist(k)) ) ABORT("Calloc fails for Urb_fstnz[]."); if ( !(Ucbs = intCalloc_dist(k)) ) ABORT("Calloc fails for Ucbs[]."); #if ( PRNTlevel>=1 ) mem_use += 2.0*k*sizeof(int_t*) + (7.0*k+1)*iword; #endif /* Compute ldaspa and ilsum[]. */ ldaspa = 0; ilsum[0] = 0; for (gb = 0; gb < nsupers; ++gb) { if ( myrow == PROW( gb, grid ) ) { i = SuperSize( gb ); ldaspa += i; lb = LBi( gb, grid ); ilsum[lb + 1] = ilsum[lb] + i; } } /* ------------------------------------------------------------ COUNT NUMBER OF ROW BLOCKS AND THE LENGTH OF EACH BLOCK IN U. THIS ACCOUNTS FOR ONE-PASS PROCESSING OF G(U). ------------------------------------------------------------*/ /* Loop through each supernode column. */ for (jb = 0; jb < nsupers; ++jb) { pc = PCOL( jb, grid ); fsupc = FstBlockC( jb ); nsupc = SuperSize( jb ); /* Loop through each column in the block. */ for (j = fsupc; j < fsupc + nsupc; ++j) { /* usub[*] contains only "first nonzero" in each segment. */ for (i = xusub[j]; i < xusub[j+1]; ++i) { irow = usub[i]; /* First nonzero of the segment. */ gb = BlockNum( irow ); kcol = PCOL( gb, grid ); ljb = LBj( gb, grid ); if ( mycol == kcol && mycol != pc ) ToSendR[ljb][pc] = YES; pr = PROW( gb, grid ); lb = LBi( gb, grid ); if ( mycol == pc ) { if ( myrow == pr ) { ToSendD[lb] = YES; /* Count nonzeros in entire block row. */ Urb_length[lb] += FstBlockC( gb+1 ) - irow; if (rb_marker[lb] <= jb) {/* First see the block */ rb_marker[lb] = jb + 1; Urb_fstnz[lb] += nsupc; ++Ucbs[lb]; /* Number of column blocks in block row lb. */ #if ( PRNTlevel>=1 ) ++nUblocks; #endif } ToRecv[gb] = 1; } else ToRecv[gb] = 2; /* Do I need 0, 1, 2 ? */ } } /* for i ... */ } /* for j ... */ } /* for jb ... */ /* Set up the initial pointers for each block row in U. */ nrbu = CEILING( nsupers, grid->nprow );/* Number of local block rows */ for (lb = 0; lb < nrbu; ++lb) { len = Urb_length[lb]; rb_marker[lb] = 0; /* Reset block marker. */ if ( len ) { /* Add room for descriptors */ len1 = Urb_fstnz[lb] + BR_HEADER + Ucbs[lb] * UB_DESCRIPTOR; if ( !(index = intMalloc_dist(len1+1)) ) ABORT("Malloc fails for Uindex[]."); Ufstnz_br_ptr[lb] = index; if ( !(Unzval_br_ptr[lb] = doublecomplexMalloc_dist(len)) ) ABORT("Malloc fails for Unzval_br_ptr[*][]."); mybufmax[2] = SUPERLU_MAX( mybufmax[2], len1 ); mybufmax[3] = SUPERLU_MAX( mybufmax[3], len ); index[0] = Ucbs[lb]; /* Number of column blocks */ index[1] = len; /* Total length of nzval[] */ index[2] = len1; /* Total length of index[] */ index[len1] = -1; /* End marker */ } else { Ufstnz_br_ptr[lb] = NULL; Unzval_br_ptr[lb] = NULL; } Urb_length[lb] = 0; /* Reset block length. */ Urb_indptr[lb] = BR_HEADER; /* Skip header in U index[]. */ Urb_fstnz[lb] = BR_HEADER; } /* for lb ... */ SUPERLU_FREE(Ucbs); #if ( PROFlevel>=1 ) t = SuperLU_timer_() - t; if ( !iam) printf(".. Phase 2 - setup U strut time: %.2f\t\n", t); #endif #if ( PRNTlevel>=1 ) mem_use -= 2.0*k * iword; #endif /* Auxiliary arrays used to set up L block data structures. They are freed on return. k is the number of local row blocks. */ if ( !(Lrb_length = intCalloc_dist(k)) ) ABORT("Calloc fails for Lrb_length[]."); if ( !(Lrb_number = intMalloc_dist(k)) ) ABORT("Malloc fails for Lrb_number[]."); if ( !(Lrb_indptr = intMalloc_dist(k)) ) ABORT("Malloc fails for Lrb_indptr[]."); if ( !(Lrb_valptr = intMalloc_dist(k)) ) ABORT("Malloc fails for Lrb_valptr[]."); if (!(dense=doublecomplexCalloc_dist(SUPERLU_MAX(1,((size_t)ldaspa) *sp_ienv_dist(3))))) ABORT("Calloc fails for SPA dense[]."); /* These counts will be used for triangular solves. */ if ( !(fmod = intCalloc_dist(k)) ) ABORT("Calloc fails for fmod[]."); if ( !(bmod = intCalloc_dist(k)) ) ABORT("Calloc fails for bmod[]."); #if ( PRNTlevel>=1 ) mem_use += 6.0*k*iword + ldaspa*sp_ienv_dist(3)*zword; #endif k = CEILING( nsupers, grid->npcol );/* Number of local block columns */ /* Pointers to the beginning of each block column of L. */ if ( !(Lnzval_bc_ptr = (doublecomplex**)SUPERLU_MALLOC(k * sizeof(doublecomplex*))) ) ABORT("Malloc fails for Lnzval_bc_ptr[]."); if ( !(Lrowind_bc_ptr = (int_t**)SUPERLU_MALLOC(k * sizeof(int_t*))) ) ABORT("Malloc fails for Lrowind_bc_ptr[]."); Lrowind_bc_ptr[k-1] = NULL; /* These lists of processes will be used for triangular solves. */ if ( !(fsendx_plist = (int_t **) SUPERLU_MALLOC(k*sizeof(int_t*))) ) ABORT("Malloc fails for fsendx_plist[]."); len = k * grid->nprow; if ( !(index = intMalloc_dist(len)) ) ABORT("Malloc fails for fsendx_plist[0]"); for (i = 0; i < len; ++i) index[i] = EMPTY; for (i = 0, j = 0; i < k; ++i, j += grid->nprow) fsendx_plist[i] = &index[j]; if ( !(bsendx_plist = (int_t **) SUPERLU_MALLOC(k*sizeof(int_t*))) ) ABORT("Malloc fails for bsendx_plist[]."); if ( !(index = intMalloc_dist(len)) ) ABORT("Malloc fails for bsendx_plist[0]"); for (i = 0; i < len; ++i) index[i] = EMPTY; for (i = 0, j = 0; i < k; ++i, j += grid->nprow) bsendx_plist[i] = &index[j]; #if ( PRNTlevel>=1 ) mem_use += 4.0*k*sizeof(int_t*) + 2.0*len*iword; #endif /*------------------------------------------------------------ PROPAGATE ROW SUBSCRIPTS AND VALUES OF A INTO L AND U BLOCKS. THIS ACCOUNTS FOR ONE-PASS PROCESSING OF A, L AND U. ------------------------------------------------------------*/ for (jb = 0; jb < nsupers; ++jb) { pc = PCOL( jb, grid ); if ( mycol == pc ) { /* Block column jb in my process column */ fsupc = FstBlockC( jb ); nsupc = SuperSize( jb ); ljb = LBj( jb, grid ); /* Local block number */ /* Scatter A into SPA. */ for (j = fsupc, dense_col = dense; j < FstBlockC( jb+1 ); ++j){ for (i = xa_begin[j]; i < xa_end[j]; ++i) { irow = asub[i]; gb = BlockNum( irow ); if ( myrow == PROW( gb, grid ) ) { lb = LBi( gb, grid ); irow = ilsum[lb] + irow - FstBlockC( gb ); dense_col[irow] = a[i]; } } dense_col += ldaspa; } jbrow = PROW( jb, grid ); #if ( PROFlevel>=1 ) t = SuperLU_timer_(); #endif /*------------------------------------------------ * SET UP U BLOCKS. *------------------------------------------------*/ kseen = 0; dense_col = dense; /* Loop through each column in the block column. */ for (j = fsupc; j < FstBlockC( jb+1 ); ++j) { istart = xusub[j]; /* NOTE: Only the first nonzero index of the segment is stored in usub[]. */ for (i = istart; i < xusub[j+1]; ++i) { irow = usub[i]; /* First nonzero in the segment. */ gb = BlockNum( irow ); pr = PROW( gb, grid ); if ( pr != jbrow && myrow == jbrow && /* diag. proc. owning jb */ bsendx_plist[ljb][pr] == EMPTY ) { bsendx_plist[ljb][pr] = YES; ++nbsendx; } if ( myrow == pr ) { lb = LBi( gb, grid ); /* Local block number */ index = Ufstnz_br_ptr[lb]; uval = Unzval_br_ptr[lb]; fsupc1 = FstBlockC( gb+1 ); if (rb_marker[lb] <= jb) { /* First time see the block */ rb_marker[lb] = jb + 1; Urb_indptr[lb] = Urb_fstnz[lb];; index[Urb_indptr[lb]] = jb; /* Descriptor */ Urb_indptr[lb] += UB_DESCRIPTOR; /* Record the first location in index[] of the next block */ Urb_fstnz[lb] = Urb_indptr[lb] + nsupc; len = Urb_indptr[lb];/* Start fstnz in index */ index[len-1] = 0; for (k = 0; k < nsupc; ++k) index[len+k] = fsupc1; if ( gb != jb )/* Exclude diagonal block. */ ++bmod[lb];/* Mod. count for back solve */ if ( kseen == 0 && myrow != jbrow ) { ++nbrecvx; kseen = 1; } } else { /* Already saw the block */ len = Urb_indptr[lb];/* Start fstnz in index */ } jj = j - fsupc; index[len+jj] = irow; /* Load the numerical values */ k = fsupc1 - irow; /* No. of nonzeros in segment */ index[len-1] += k; /* Increment block length in Descriptor */ irow = ilsum[lb] + irow - FstBlockC( gb ); for (ii = 0; ii < k; ++ii) { uval[Urb_length[lb]++] = dense_col[irow + ii]; dense_col[irow + ii] = zero; } } /* if myrow == pr ... */ } /* for i ... */ dense_col += ldaspa; } /* for j ... */ #if ( PROFlevel>=1 ) t_u += SuperLU_timer_() - t; t = SuperLU_timer_(); #endif /*------------------------------------------------ * SET UP L BLOCKS. *------------------------------------------------*/ /* Count number of blocks and length of each block. */ nrbl = 0; len = 0; /* Number of row subscripts I own. */ kseen = 0; istart = xlsub[fsupc]; for (i = istart; i < xlsub[fsupc+1]; ++i) { irow = lsub[i]; gb = BlockNum( irow ); /* Global block number */ pr = PROW( gb, grid ); /* Process row owning this block */ if ( pr != jbrow && myrow == jbrow && /* diag. proc. owning jb */ fsendx_plist[ljb][pr] == EMPTY /* first time */ ) { fsendx_plist[ljb][pr] = YES; ++nfsendx; } if ( myrow == pr ) { lb = LBi( gb, grid ); /* Local block number */ if (rb_marker[lb] <= jb) { /* First see this block */ rb_marker[lb] = jb + 1; Lrb_length[lb] = 1; Lrb_number[nrbl++] = gb; if ( gb != jb ) /* Exclude diagonal block. */ ++fmod[lb]; /* Mod. count for forward solve */ if ( kseen == 0 && myrow != jbrow ) { ++nfrecvx; kseen = 1; } #if ( PRNTlevel>=1 ) ++nLblocks; #endif } else { ++Lrb_length[lb]; } ++len; } } /* for i ... */ if ( nrbl ) { /* Do not ensure the blocks are sorted! */ /* Set up the initial pointers for each block in index[] and nzval[]. */ /* Add room for descriptors */ len1 = len + BC_HEADER + nrbl * LB_DESCRIPTOR; if ( !(index = intMalloc_dist(len1)) ) ABORT("Malloc fails for index[]"); Lrowind_bc_ptr[ljb] = index; if (!(Lnzval_bc_ptr[ljb] = doublecomplexMalloc_dist(((size_t)len)*nsupc))) { fprintf(stderr, "col block " IFMT " ", jb); ABORT("Malloc fails for Lnzval_bc_ptr[*][]"); } mybufmax[0] = SUPERLU_MAX( mybufmax[0], len1 ); mybufmax[1] = SUPERLU_MAX( mybufmax[1], len*nsupc ); mybufmax[4] = SUPERLU_MAX( mybufmax[4], len ); index[0] = nrbl; /* Number of row blocks */ index[1] = len; /* LDA of the nzval[] */ next_lind = BC_HEADER; next_lval = 0; for (k = 0; k < nrbl; ++k) { gb = Lrb_number[k]; lb = LBi( gb, grid ); len = Lrb_length[lb]; Lrb_length[lb] = 0; /* Reset vector of block length */ index[next_lind++] = gb; /* Descriptor */ index[next_lind++] = len; Lrb_indptr[lb] = next_lind; Lrb_valptr[lb] = next_lval; next_lind += len; next_lval += len; } /* Propagate the compressed row subscripts to Lindex[], and the initial values of A from SPA into Lnzval[]. */ lusup = Lnzval_bc_ptr[ljb]; len = index[1]; /* LDA of lusup[] */ for (i = istart; i < xlsub[fsupc+1]; ++i) { irow = lsub[i]; gb = BlockNum( irow ); if ( myrow == PROW( gb, grid ) ) { lb = LBi( gb, grid ); k = Lrb_indptr[lb]++; /* Random access a block */ index[k] = irow; k = Lrb_valptr[lb]++; irow = ilsum[lb] + irow - FstBlockC( gb ); for (j = 0, dense_col = dense; j < nsupc; ++j) { lusup[k] = dense_col[irow]; dense_col[irow] = zero; k += len; dense_col += ldaspa; } } } /* for i ... */ } else { Lrowind_bc_ptr[ljb] = NULL; Lnzval_bc_ptr[ljb] = NULL; } /* if nrbl ... */ #if ( PROFlevel>=1 ) t_l += SuperLU_timer_() - t; #endif } /* if mycol == pc */ } /* for jb ... */ Llu->Lrowind_bc_ptr = Lrowind_bc_ptr; Llu->Lnzval_bc_ptr = Lnzval_bc_ptr; Llu->Ufstnz_br_ptr = Ufstnz_br_ptr; Llu->Unzval_br_ptr = Unzval_br_ptr; Llu->ToRecv = ToRecv; Llu->ToSendD = ToSendD; Llu->ToSendR = ToSendR; Llu->fmod = fmod; Llu->fsendx_plist = fsendx_plist; Llu->nfrecvx = nfrecvx; Llu->nfsendx = nfsendx; Llu->bmod = bmod; Llu->bsendx_plist = bsendx_plist; Llu->nbrecvx = nbrecvx; Llu->nbsendx = nbsendx; Llu->ilsum = ilsum; Llu->ldalsum = ldaspa; #if ( PRNTlevel>=1 ) if ( !iam ) printf(".. # L blocks " IFMT "\t# U blocks " IFMT "\n", nLblocks, nUblocks); #endif SUPERLU_FREE(rb_marker); SUPERLU_FREE(Urb_fstnz); SUPERLU_FREE(Urb_length); SUPERLU_FREE(Urb_indptr); SUPERLU_FREE(Lrb_length); SUPERLU_FREE(Lrb_number); SUPERLU_FREE(Lrb_indptr); SUPERLU_FREE(Lrb_valptr); SUPERLU_FREE(dense); k = CEILING( nsupers, grid->nprow );/* Number of local block rows */ if ( !(Llu->mod_bit = intMalloc_dist(k)) ) ABORT("Malloc fails for mod_bit[]."); /* Find the maximum buffer size. */ MPI_Allreduce(mybufmax, Llu->bufmax, NBUFFERS, mpi_int_t, MPI_MAX, grid->comm); #if ( PROFlevel>=1 ) if ( !iam ) printf(".. 1st distribute time:\n " "\tL\t%.2f\n\tU\t%.2f\n" "\tu_blks %d\tnrbu %d\n--------\n", t_l, t_u, u_blks, nrbu); #endif } /* else fact != SamePattern_SameRowPerm */ #if ( DEBUGlevel>=1 ) /* Memory allocated but not freed: ilsum, fmod, fsendx_plist, bmod, bsendx_plist */ CHECK_MALLOC(iam, "Exit zdistribute()"); #endif return (mem_use); } /* ZDISTRIBUTE */ SuperLU_DIST_5.3.0/SRC/pzgsrfs.c0000644013363400111340000002007513233431301015131 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Improves the computed solution to a system of linear equations and provides error bounds and backward error estimates * *
 * -- Distributed SuperLU routine (version 4.3) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * March 15, 2003
 *
 * Last modified:
 * December 31, 2015
 * 
*/ #include #include "superlu_zdefs.h" /*! \brief * *
 * Purpose
 * =======
 *
 * PZGSRFS improves the computed solution to a system of linear   
 * equations and provides error bounds and backward error estimates
 * for the solution. 
 *
 * Arguments
 * =========
 *
 * n      (input) int (global)
 *        The order of the system of linear equations.
 *
 * A      (input) SuperMatrix*
 *	  The original matrix A, or the scaled A if equilibration was done.
 *        A is also permuted into diag(R)*A*diag(C)*Pc'. The type of A can be:
 *        Stype = SLU_NR_loc; Dtype = SLU_Z; Mtype = SLU_GE.
 *
 * anorm  (input) double
 *        The norm of the original matrix A, or the scaled A if
 *        equilibration was done.
 *
 * LUstruct (input) LUstruct_t*
 *        The distributed data structures storing L and U factors.
 *        The L and U factors are obtained from pdgstrf for
 *        the possibly scaled and permuted matrix A.
 *        See superlu_zdefs.h for the definition of 'LUstruct_t'.
 *
 * ScalePermstruct (input) ScalePermstruct_t* (global)
 *         The data structure to store the scaling and permutation vectors
 *         describing the transformations performed to the matrix A.
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh. It contains the MPI communicator, the number
 *        of process rows (NPROW), the number of process columns (NPCOL),
 *        and my process rank. It is an input argument to all the
 *        parallel routines.
 *        Grid can be initialized by subroutine SUPERLU_GRIDINIT.
 *        See superlu_defs.h for the definition of 'gridinfo_t'.
 *
 * B      (input) doublecomplex* (local)
 *        The m_loc-by-NRHS right-hand side matrix of the possibly
 *        equilibrated system. That is, B may be overwritten by diag(R)*B.
 *       
 * ldb    (input) int (local)
 *        Leading dimension of matrix B.
 *
 * X      (input/output) doublecomplex* (local)
 *        On entry, the solution matrix Y, as computed by PDGSTRS, of the
 *            transformed system A1*Y = Pc*Pr*B. where
 *            A1 = Pc*Pr*diag(R)*A*diag(C)*Pc' and Y = Pc*diag(C)^(-1)*X.
 *        On exit, the improved solution matrix Y.
 *
 *        In order to obtain the solution X to the original system,
 *        Y should be permutated by Pc^T, and premultiplied by diag(C)
 *        if DiagScale = COL or BOTH.
 *        This must be done after this routine is called.
 *
 * ldx    (input) int (local)
 *        Leading dimension of matrix X.
 *
 * nrhs   (input) int
 *        Number of right-hand sides.
 *
 * SOLVEstruct (output) SOLVEstruct_t* (global)
 *        Contains the information for the communication during the
 *        solution phase.
 *
 * berr   (output) double*, dimension (nrhs)
 *         The componentwise relative backward error of each solution   
 *         vector X(j) (i.e., the smallest relative change in   
 *         any element of A or B that makes X(j) an exact solution).
 *
 * stat   (output) SuperLUStat_t*
 *        Record the statistics about the refinement steps.
 *        See util.h for the definition of SuperLUStat_t.
 *
 * info   (output) int*
 *        = 0: successful exit
 *        < 0: if info = -i, the i-th argument had an illegal value
 *        
 * Internal Parameters   
 * ===================   
 *
 * ITMAX is the maximum number of steps of iterative refinement.   
 * 
*/ void pzgsrfs(int_t n, SuperMatrix *A, double anorm, LUstruct_t *LUstruct, ScalePermstruct_t *ScalePermstruct, gridinfo_t *grid, doublecomplex *B, int_t ldb, doublecomplex *X, int_t ldx, int nrhs, SOLVEstruct_t *SOLVEstruct, double *berr, SuperLUStat_t *stat, int *info) { #define ITMAX 20 Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; doublecomplex *ax, *R, *dx, *temp, *work, *B_col, *X_col; double *rtemp; int_t count, i, j, lwork, nz; int iam; double eps, lstres; double s, safmin, safe1, safe2; /* Data structures used by matrix-vector multiply routine. */ pzgsmv_comm_t *gsmv_comm = SOLVEstruct->gsmv_comm; NRformat_loc *Astore; int_t m_loc, fst_row; /* Initialization. */ Astore = (NRformat_loc *) A->Store; m_loc = Astore->m_loc; fst_row = Astore->fst_row; iam = grid->iam; /* Test the input parameters. */ *info = 0; if ( n < 0 ) *info = -1; else if ( A->nrow != A->ncol || A->nrow < 0 || A->Stype != SLU_NR_loc || A->Dtype != SLU_Z || A->Mtype != SLU_GE ) *info = -2; else if ( ldb < SUPERLU_MAX(0, m_loc) ) *info = -10; else if ( ldx < SUPERLU_MAX(0, m_loc) ) *info = -12; else if ( nrhs < 0 ) *info = -13; if (*info != 0) { i = -(*info); pxerr_dist("PZGSRFS", grid, i); return; } /* Quick return if possible. */ if ( n == 0 || nrhs == 0 ) { return; } #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter pzgsrfs()"); #endif lwork = 2 * m_loc; /* For ax/R/dx and temp */ if ( !(work = doublecomplexMalloc_dist(lwork)) ) ABORT("Malloc fails for work[]"); ax = R = dx = work; temp = ax + m_loc; rtemp = (double *) temp; /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ nz = A->ncol + 1; eps = dmach_dist("Epsilon"); safmin = dmach_dist("Safe minimum"); /* Set SAFE1 essentially to be the underflow threshold times the number of additions in each row. */ safe1 = nz * safmin; safe2 = safe1 / eps; #if ( DEBUGlevel>=1 ) if ( !iam ) printf(".. eps = %e\tanorm = %e\tsafe1 = %e\tsafe2 = %e\n", eps, anorm, safe1, safe2); #endif /* Do for each right-hand side ... */ for (j = 0; j < nrhs; ++j) { count = 0; lstres = 3.; B_col = &B[j*ldb]; X_col = &X[j*ldx]; while (1) { /* Loop until stopping criterion is satisfied. */ /* Compute residual R = B - op(A) * X, where op(A) = A, A**T, or A**H, depending on TRANS. */ /* Matrix-vector multiply. */ pzgsmv(0, A, grid, gsmv_comm, X_col, ax); /* Compute residual, stored in R[]. */ for (i = 0; i < m_loc; ++i) z_sub(&R[i], &B_col[i], &ax[i]); /* Compute abs(op(A))*abs(X) + abs(B), stored in temp[]. */ pzgsmv(1, A, grid, gsmv_comm, X_col, temp); /* NOTE: rtemp is aliased to temp */ for (i = 0; i < m_loc; ++i) rtemp[i] += slud_z_abs1(&B_col[i]); s = 0.0; for (i = 0; i < m_loc; ++i) { if ( rtemp[i] > safe2 ) { s = SUPERLU_MAX(s, slud_z_abs1(&R[i]) / rtemp[i]); } else if ( rtemp[i] != 0.0 ) { s = SUPERLU_MAX(s, (safe1 + slud_z_abs1(&R[i])) / rtemp[i]); } /* If temp[i] is exactly 0.0 (computed by PxGSMV), then we know the true residual also must be exactly 0.0. */ } MPI_Allreduce( &s, &berr[j], 1, MPI_DOUBLE, MPI_MAX, grid->comm ); #if ( PRNTlevel>= 1 ) if ( !iam ) printf("(%2d) .. Step " IFMT ": berr[j] = %e\n", iam, count, berr[j]); #endif if ( berr[j] > eps && berr[j] * 2 <= lstres && count < ITMAX ) { /* Compute new dx. */ pzgstrs(n, LUstruct, ScalePermstruct, grid, dx, m_loc, fst_row, m_loc, 1, SOLVEstruct, stat, info); /* Update solution. */ for (i = 0; i < m_loc; ++i) z_add(&X_col[i], &X_col[i], &dx[i]); lstres = berr[j]; ++count; } else { break; } } /* end while */ stat->RefineSteps = count; } /* for j ... */ /* Deallocate storage. */ SUPERLU_FREE(work); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pzgsrfs()"); #endif } /* PZGSRFS */ SuperLU_DIST_5.3.0/SRC/pzgssvx.c0000644013363400111340000016102513233431301015160 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Solves a system of linear equations A*X=B * *
 * -- Distributed SuperLU routine (version 5.1.3) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * November 1, 2007
 * October 22, 2012
 * October  1, 2014
 * April 5, 2015
 * December 31, 2015  version 4.3
 * December 31, 2016  version 5.1.3
 * 
*/ #include #include "superlu_zdefs.h" /*! \brief * *
 * Purpose
 * =======
 *
 * PZGSSVX solves a system of linear equations A*X=B,
 * by using Gaussian elimination with "static pivoting" to
 * compute the LU factorization of A.
 *
 * Static pivoting is a technique that combines the numerical stability
 * of partial pivoting with the scalability of Cholesky (no pivoting),
 * to run accurately and efficiently on large numbers of processors.
 * See our paper at http://www.nersc.gov/~xiaoye/SuperLU/ for a detailed
 * description of the parallel algorithms.
 *
 * The input matrices A and B are distributed by block rows.
 * Here is a graphical illustration (0-based indexing):
 *
 *                        A                B
 *               0 ---------------       ------
 *                   |           |        |  |
 *                   |           |   P0   |  |
 *                   |           |        |  |
 *                 ---------------       ------
 *        - fst_row->|           |        |  |
 *        |          |           |        |  |
 *       m_loc       |           |   P1   |  |
 *        |          |           |        |  |
 *        -          |           |        |  |
 *                 ---------------       ------
 *                   |    .      |        |. |
 *                   |    .      |        |. |
 *                   |    .      |        |. |
 *                 ---------------       ------
 * 
 * where, fst_row is the row number of the first row,
 *        m_loc is the number of rows local to this processor
 * These are defined in the 'SuperMatrix' structure, see supermatrix.h.
 *
 *
 * Here are the options for using this code:
 *
 *   1. Independent of all the other options specified below, the
 *      user must supply
 *
 *      -  B, the matrix of right-hand sides, distributed by block rows,
 *            and its dimensions ldb (local) and nrhs (global)
 *      -  grid, a structure describing the 2D processor mesh
 *      -  options->IterRefine, which determines whether or not to
 *            improve the accuracy of the computed solution using 
 *            iterative refinement
 *
 *      On output, B is overwritten with the solution X.
 *
 *   2. Depending on options->Fact, the user has four options
 *      for solving A*X=B. The standard option is for factoring
 *      A "from scratch". (The other options, described below,
 *      are used when A is sufficiently similar to a previously 
 *      solved problem to save time by reusing part or all of 
 *      the previous factorization.)
 *
 *      -  options->Fact = DOFACT: A is factored "from scratch"
 *
 *      In this case the user must also supply
 *
 *        o  A, the input matrix
 *
 *        as well as the following options to determine what matrix to
 *        factorize.
 *
 *        o  options->Equil,   to specify how to scale the rows and columns
 *                             of A to "equilibrate" it (to try to reduce its
 *                             condition number and so improve the
 *                             accuracy of the computed solution)
 *
 *        o  options->RowPerm, to specify how to permute the rows of A
 *                             (typically to control numerical stability)
 *
 *        o  options->ColPerm, to specify how to permute the columns of A
 *                             (typically to control fill-in and enhance
 *                             parallelism during factorization)
 *
 *        o  options->ReplaceTinyPivot, to specify how to deal with tiny
 *                             pivots encountered during factorization
 *                             (to control numerical stability)
 *
 *      The outputs returned include
 *         
 *        o  ScalePermstruct,  modified to describe how the input matrix A
 *                             was equilibrated and permuted:
 *          .  ScalePermstruct->DiagScale, indicates whether the rows and/or
 *                                         columns of A were scaled
 *          .  ScalePermstruct->R, array of row scale factors
 *          .  ScalePermstruct->C, array of column scale factors
 *          .  ScalePermstruct->perm_r, row permutation vector
 *          .  ScalePermstruct->perm_c, column permutation vector
 *
 *          (part of ScalePermstruct may also need to be supplied on input,
 *           depending on options->RowPerm and options->ColPerm as described 
 *           later).
 *
 *        o  A, the input matrix A overwritten by the scaled and permuted
 *              matrix diag(R)*A*diag(C)*Pc^T, where 
 *              Pc is the row permutation matrix determined by
 *                  ScalePermstruct->perm_c
 *              diag(R) and diag(C) are diagonal scaling matrices determined
 *                  by ScalePermstruct->DiagScale, ScalePermstruct->R and 
 *                  ScalePermstruct->C
 *
 *        o  LUstruct, which contains the L and U factorization of A1 where
 *
 *                A1 = Pc*Pr*diag(R)*A*diag(C)*Pc^T = L*U
 *
 *               (Note that A1 = Pc*Pr*Aout, where Aout is the matrix stored
 *                in A on output.)
 *
 *   3. The second value of options->Fact assumes that a matrix with the same
 *      sparsity pattern as A has already been factored:
 *     
 *      -  options->Fact = SamePattern: A is factored, assuming that it has
 *            the same nonzero pattern as a previously factored matrix. In
 *            this case the algorithm saves time by reusing the previously
 *            computed column permutation vector stored in
 *            ScalePermstruct->perm_c and the "elimination tree" of A
 *            stored in LUstruct->etree
 *
 *      In this case the user must still specify the following options
 *      as before:
 *
 *        o  options->Equil
 *        o  options->RowPerm
 *        o  options->ReplaceTinyPivot
 *
 *      but not options->ColPerm, whose value is ignored. This is because the
 *      previous column permutation from ScalePermstruct->perm_c is used as
 *      input. The user must also supply 
 *
 *        o  A, the input matrix
 *        o  ScalePermstruct->perm_c, the column permutation
 *        o  LUstruct->etree, the elimination tree
 *
 *      The outputs returned include
 *         
 *        o  A, the input matrix A overwritten by the scaled and permuted
 *              matrix as described above
 *        o  ScalePermstruct, modified to describe how the input matrix A was
 *                            equilibrated and row permuted
 *        o  LUstruct, modified to contain the new L and U factors
 *
 *   4. The third value of options->Fact assumes that a matrix B with the same
 *      sparsity pattern as A has already been factored, and where the
 *      row permutation of B can be reused for A. This is useful when A and B
 *      have similar numerical values, so that the same row permutation
 *      will make both factorizations numerically stable. This lets us reuse
 *      all of the previously computed structure of L and U.
 *
 *      -  options->Fact = SamePattern_SameRowPerm: A is factored,
 *            assuming not only the same nonzero pattern as the previously
 *            factored matrix B, but reusing B's row permutation.
 *
 *      In this case the user must still specify the following options
 *      as before:
 *
 *        o  options->Equil
 *        o  options->ReplaceTinyPivot
 *
 *      but not options->RowPerm or options->ColPerm, whose values are
 *      ignored. This is because the permutations from ScalePermstruct->perm_r
 *      and ScalePermstruct->perm_c are used as input.
 *
 *      The user must also supply 
 *
 *        o  A, the input matrix
 *        o  ScalePermstruct->DiagScale, how the previous matrix was row
 *                                       and/or column scaled
 *        o  ScalePermstruct->R, the row scalings of the previous matrix,
 *                               if any
 *        o  ScalePermstruct->C, the columns scalings of the previous matrix, 
 *                               if any
 *        o  ScalePermstruct->perm_r, the row permutation of the previous
 *                                    matrix
 *        o  ScalePermstruct->perm_c, the column permutation of the previous 
 *                                    matrix
 *        o  all of LUstruct, the previously computed information about
 *                            L and U (the actual numerical values of L and U
 *                            stored in LUstruct->Llu are ignored)
 *
 *      The outputs returned include
 *         
 *        o  A, the input matrix A overwritten by the scaled and permuted
 *              matrix as described above
 *        o  ScalePermstruct,  modified to describe how the input matrix A was
 *                             equilibrated (thus ScalePermstruct->DiagScale,
 *                             R and C may be modified)
 *        o  LUstruct, modified to contain the new L and U factors
 *
 *   5. The fourth and last value of options->Fact assumes that A is
 *      identical to a matrix that has already been factored on a previous 
 *      call, and reuses its entire LU factorization
 *
 *      -  options->Fact = Factored: A is identical to a previously
 *            factorized matrix, so the entire previous factorization
 *            can be reused.
 *
 *      In this case all the other options mentioned above are ignored
 *      (options->Equil, options->RowPerm, options->ColPerm, 
 *       options->ReplaceTinyPivot)
 *
 *      The user must also supply 
 *
 *        o  A, the unfactored matrix, only in the case that iterative
 *              refinement is to be done (specifically A must be the output
 *              A from the previous call, so that it has been scaled and permuted)
 *        o  all of ScalePermstruct
 *        o  all of LUstruct, including the actual numerical values of
 *           L and U
 *
 *      all of which are unmodified on output.
 *         
 * Arguments
 * =========
 *
 * options (input) superlu_dist_options_t* (global)
 *         The structure defines the input parameters to control
 *         how the LU decomposition will be performed.
 *         The following fields should be defined for this structure:
 *         
 *         o Fact (fact_t)
 *           Specifies whether or not the factored form of the matrix
 *           A is supplied on entry, and if not, how the matrix A should
 *           be factorized based on the previous history.
 *
 *           = DOFACT: The matrix A will be factorized from scratch.
 *                 Inputs:  A
 *                          options->Equil, RowPerm, ColPerm, ReplaceTinyPivot
 *                 Outputs: modified A
 *                             (possibly row and/or column scaled and/or 
 *                              permuted)
 *                          all of ScalePermstruct
 *                          all of LUstruct
 *
 *           = SamePattern: the matrix A will be factorized assuming
 *             that a factorization of a matrix with the same sparsity
 *             pattern was performed prior to this one. Therefore, this
 *             factorization will reuse column permutation vector 
 *             ScalePermstruct->perm_c and the elimination tree
 *             LUstruct->etree
 *                 Inputs:  A
 *                          options->Equil, RowPerm, ReplaceTinyPivot
 *                          ScalePermstruct->perm_c
 *                          LUstruct->etree
 *                 Outputs: modified A
 *                             (possibly row and/or column scaled and/or 
 *                              permuted)
 *                          rest of ScalePermstruct (DiagScale, R, C, perm_r)
 *                          rest of LUstruct (GLU_persist, Llu)
 *
 *           = SamePattern_SameRowPerm: the matrix A will be factorized
 *             assuming that a factorization of a matrix with the same
 *             sparsity	pattern and similar numerical values was performed
 *             prior to this one. Therefore, this factorization will reuse
 *             both row and column scaling factors R and C, and the
 *             both row and column permutation vectors perm_r and perm_c,
 *             distributed data structure set up from the previous symbolic
 *             factorization.
 *                 Inputs:  A
 *                          options->Equil, ReplaceTinyPivot
 *                          all of ScalePermstruct
 *                          all of LUstruct
 *                 Outputs: modified A
 *                             (possibly row and/or column scaled and/or 
 *                              permuted)
 *                          modified LUstruct->Llu
 *           = FACTORED: the matrix A is already factored.
 *                 Inputs:  all of ScalePermstruct
 *                          all of LUstruct
 *
 *         o Equil (yes_no_t)
 *           Specifies whether to equilibrate the system.
 *           = NO:  no equilibration.
 *           = YES: scaling factors are computed to equilibrate the system:
 *                      diag(R)*A*diag(C)*inv(diag(C))*X = diag(R)*B.
 *                  Whether or not the system will be equilibrated depends
 *                  on the scaling of the matrix A, but if equilibration is
 *                  used, A is overwritten by diag(R)*A*diag(C) and B by
 *                  diag(R)*B.
 *
 *         o RowPerm (rowperm_t)
 *           Specifies how to permute rows of the matrix A.
 *           = NATURAL:   use the natural ordering.
 *           = LargeDiag: use the Duff/Koster algorithm to permute rows of
 *                        the original matrix to make the diagonal large
 *                        relative to the off-diagonal.
 *           = MY_PERMR:  use the ordering given in ScalePermstruct->perm_r
 *                        input by the user.
 *           
 *         o ColPerm (colperm_t)
 *           Specifies what type of column permutation to use to reduce fill.
 *           = NATURAL:       natural ordering.
 *           = MMD_AT_PLUS_A: minimum degree ordering on structure of A'+A.
 *           = MMD_ATA:       minimum degree ordering on structure of A'*A.
 *           = MY_PERMC:      the ordering given in ScalePermstruct->perm_c.
 *         
 *         o ReplaceTinyPivot (yes_no_t)
 *           = NO:  do not modify pivots
 *           = YES: replace tiny pivots by sqrt(epsilon)*norm(A) during 
 *                  LU factorization.
 *
 *         o IterRefine (IterRefine_t)
 *           Specifies how to perform iterative refinement.
 *           = NO:     no iterative refinement.
 *           = SLU_DOUBLE: accumulate residual in double precision.
 *           = SLU_EXTRA:  accumulate residual in extra precision.
 *
 *         NOTE: all options must be identical on all processes when
 *               calling this routine.
 *
 * A (input/output) SuperMatrix* (local)
 *         On entry, matrix A in A*X=B, of dimension (A->nrow, A->ncol).
 *           The number of linear equations is A->nrow. The type of A must be:
 *           Stype = SLU_NR_loc; Dtype = SLU_D; Mtype = SLU_GE.
 *           That is, A is stored in distributed compressed row format.
 *           See supermatrix.h for the definition of 'SuperMatrix'.
 *           This routine only handles square A, however, the LU factorization
 *           routine PDGSTRF can factorize rectangular matrices.
 *         On exit, A may be overwtirren by diag(R)*A*diag(C)*Pc^T,
 *           depending on ScalePermstruct->DiagScale and options->ColPerm:
 *             if ScalePermstruct->DiagScale != NOEQUIL, A is overwritten by
 *                diag(R)*A*diag(C).
 *             if options->ColPerm != NATURAL, A is further overwritten by
 *                diag(R)*A*diag(C)*Pc^T.
 *           If all the above condition are true, the LU decomposition is
 *           performed on the matrix Pc*Pr*diag(R)*A*diag(C)*Pc^T.
 *
 * ScalePermstruct (input/output) ScalePermstruct_t* (global)
 *         The data structure to store the scaling and permutation vectors
 *         describing the transformations performed to the matrix A.
 *         It contains the following fields:
 *
 *         o DiagScale (DiagScale_t)
 *           Specifies the form of equilibration that was done.
 *           = NOEQUIL: no equilibration.
 *           = ROW:     row equilibration, i.e., A was premultiplied by
 *                      diag(R).
 *           = COL:     Column equilibration, i.e., A was postmultiplied
 *                      by diag(C).
 *           = BOTH:    both row and column equilibration, i.e., A was 
 *                      replaced by diag(R)*A*diag(C).
 *           If options->Fact = FACTORED or SamePattern_SameRowPerm,
 *           DiagScale is an input argument; otherwise it is an output
 *           argument.
 *
 *         o perm_r (int*)
 *           Row permutation vector, which defines the permutation matrix Pr;
 *           perm_r[i] = j means row i of A is in position j in Pr*A.
 *           If options->RowPerm = MY_PERMR, or
 *           options->Fact = SamePattern_SameRowPerm, perm_r is an
 *           input argument; otherwise it is an output argument.
 *
 *         o perm_c (int*)
 *           Column permutation vector, which defines the 
 *           permutation matrix Pc; perm_c[i] = j means column i of A is 
 *           in position j in A*Pc.
 *           If options->ColPerm = MY_PERMC or options->Fact = SamePattern
 *           or options->Fact = SamePattern_SameRowPerm, perm_c is an
 *           input argument; otherwise, it is an output argument.
 *           On exit, perm_c may be overwritten by the product of the input
 *           perm_c and a permutation that postorders the elimination tree
 *           of Pc*A'*A*Pc'; perm_c is not changed if the elimination tree
 *           is already in postorder.
 *
 *         o R (double*) dimension (A->nrow)
 *           The row scale factors for A.
 *           If DiagScale = ROW or BOTH, A is multiplied on the left by 
 *                          diag(R).
 *           If DiagScale = NOEQUIL or COL, R is not defined.
 *           If options->Fact = FACTORED or SamePattern_SameRowPerm, R is
 *           an input argument; otherwise, R is an output argument.
 *
 *         o C (double*) dimension (A->ncol)
 *           The column scale factors for A.
 *           If DiagScale = COL or BOTH, A is multiplied on the right by 
 *                          diag(C).
 *           If DiagScale = NOEQUIL or ROW, C is not defined.
 *           If options->Fact = FACTORED or SamePattern_SameRowPerm, C is
 *           an input argument; otherwise, C is an output argument.
 *         
 * B       (input/output) doublecomplex* (local)
 *         On entry, the right-hand side matrix of dimension (m_loc, nrhs),
 *           where, m_loc is the number of rows stored locally on my
 *           process and is defined in the data structure of matrix A.
 *         On exit, the solution matrix if info = 0;
 *
 * ldb     (input) int (local)
 *         The leading dimension of matrix B.
 *
 * nrhs    (input) int (global)
 *         The number of right-hand sides.
 *         If nrhs = 0, only LU decomposition is performed, the forward
 *         and back substitutions are skipped.
 *
 * grid    (input) gridinfo_t* (global)
 *         The 2D process mesh. It contains the MPI communicator, the number
 *         of process rows (NPROW), the number of process columns (NPCOL),
 *         and my process rank. It is an input argument to all the
 *         parallel routines.
 *         Grid can be initialized by subroutine SUPERLU_GRIDINIT.
 *         See superlu_zdefs.h for the definition of 'gridinfo_t'.
 *
 * LUstruct (input/output) LUstruct_t*
 *         The data structures to store the distributed L and U factors.
 *         It contains the following fields:
 *
 *         o etree (int*) dimension (A->ncol) (global)
 *           Elimination tree of Pc*(A'+A)*Pc' or Pc*A'*A*Pc'.
 *           It is computed in sp_colorder() during the first factorization,
 *           and is reused in the subsequent factorizations of the matrices
 *           with the same nonzero pattern.
 *           On exit of sp_colorder(), the columns of A are permuted so that
 *           the etree is in a certain postorder. This postorder is reflected
 *           in ScalePermstruct->perm_c.
 *           NOTE:
 *           Etree is a vector of parent pointers for a forest whose vertices
 *           are the integers 0 to A->ncol-1; etree[root]==A->ncol.
 *
 *         o Glu_persist (Glu_persist_t*) (global)
 *           Global data structure (xsup, supno) replicated on all processes,
 *           describing the supernode partition in the factored matrices
 *           L and U:
 *	       xsup[s] is the leading column of the s-th supernode,
 *             supno[i] is the supernode number to which column i belongs.
 *
 *         o Llu (LocalLU_t*) (local)
 *           The distributed data structures to store L and U factors.
 *           See superlu_zdefs.h for the definition of 'LocalLU_t'.
 *
 * SOLVEstruct (input/output) SOLVEstruct_t*
 *         The data structure to hold the communication pattern used
 *         in the phases of triangular solution and iterative refinement.
 *         This pattern should be initialized only once for repeated solutions.
 *         If options->SolveInitialized = YES, it is an input argument.
 *         If options->SolveInitialized = NO and nrhs != 0, it is an output
 *         argument. See superlu_zdefs.h for the definition of 'SOLVEstruct_t'.
 *
 * berr    (output) double*, dimension (nrhs) (global)
 *         The componentwise relative backward error of each solution   
 *         vector X(j) (i.e., the smallest relative change in   
 *         any element of A or B that makes X(j) an exact solution).
 *
 * stat   (output) SuperLUStat_t*
 *        Record the statistics on runtime and floating-point operation count.
 *        See util.h for the definition of 'SuperLUStat_t'.
 *
 * info    (output) int*
 *         = 0: successful exit
 *         > 0: if info = i, and i is
 *             <= A->ncol: U(i,i) is exactly zero. The factorization has
 *                been completed, but the factor U is exactly singular,
 *                so the solution could not be computed.
 *             > A->ncol: number of bytes allocated when memory allocation
 *                failure occurred, plus A->ncol.
 *
 * See superlu_zdefs.h for the definitions of various data types.
 * 
*/ void pzgssvx(superlu_dist_options_t *options, SuperMatrix *A, ScalePermstruct_t *ScalePermstruct, doublecomplex B[], int ldb, int nrhs, gridinfo_t *grid, LUstruct_t *LUstruct, SOLVEstruct_t *SOLVEstruct, double *berr, SuperLUStat_t *stat, int *info) { NRformat_loc *Astore; SuperMatrix GA; /* Global A in NC format */ NCformat *GAstore; doublecomplex *a_GA; SuperMatrix GAC; /* Global A in NCP format (add n end pointers) */ NCPformat *GACstore; Glu_persist_t *Glu_persist = LUstruct->Glu_persist; Glu_freeable_t *Glu_freeable; /* The nonzero structures of L and U factors, which are replicated on all processrs. (lsub, xlsub) contains the compressed subscript of supernodes in L. (usub, xusub) contains the compressed subscript of nonzero segments in U. If options->Fact != SamePattern_SameRowPerm, they are computed by SYMBFACT routine, and then used by PDDISTRIBUTE routine. They will be freed after PDDISTRIBUTE routine. If options->Fact == SamePattern_SameRowPerm, these structures are not used. */ fact_t Fact; doublecomplex *a; int_t *colptr, *rowind; int_t *perm_r; /* row permutations from partial pivoting */ int_t *perm_c; /* column permutation vector */ int_t *etree; /* elimination tree */ int_t *rowptr, *colind; /* Local A in NR*/ int_t colequ, Equil, factored, job, notran, rowequ, need_value; int_t i, iinfo, j, irow, m, n, nnz, permc_spec; int_t nnz_loc, m_loc, fst_row, icol; int iam; int ldx; /* LDA for matrix X (local). */ char equed[1], norm[1]; double *C, *R, *C1, *R1, amax, anorm, colcnd, rowcnd; doublecomplex *X, *b_col, *b_work, *x_col; double t; float GA_mem_use; /* memory usage by global A */ float dist_mem_use; /* memory usage during distribution */ superlu_dist_mem_usage_t num_mem_usage, symb_mem_usage; #if ( PRNTlevel>= 2 ) double dmin, dsum, dprod; #endif /* Structures needed for parallel symbolic factorization */ int_t *sizes, *fstVtxSep, parSymbFact; int noDomains, nprocs_num; MPI_Comm symb_comm; /* communicator for symbolic factorization */ int col, key; /* parameters for creating a new communicator */ Pslu_freeable_t Pslu_freeable; float flinfo; /* Initialization. */ m = A->nrow; n = A->ncol; Astore = (NRformat_loc *) A->Store; nnz_loc = Astore->nnz_loc; m_loc = Astore->m_loc; fst_row = Astore->fst_row; a = (doublecomplex *) Astore->nzval; rowptr = Astore->rowptr; colind = Astore->colind; sizes = NULL; fstVtxSep = NULL; symb_comm = MPI_COMM_NULL; /* Test the input parameters. */ *info = 0; Fact = options->Fact; if ( Fact < 0 || Fact > FACTORED ) *info = -1; else if ( options->RowPerm < 0 || options->RowPerm > MY_PERMR ) *info = -1; else if ( options->ColPerm < 0 || options->ColPerm > MY_PERMC ) *info = -1; else if ( options->IterRefine < 0 || options->IterRefine > SLU_EXTRA ) *info = -1; else if ( options->IterRefine == SLU_EXTRA ) { *info = -1; printf("ERROR: Extra precise iterative refinement yet to support.\n"); } else if ( A->nrow != A->ncol || A->nrow < 0 || A->Stype != SLU_NR_loc || A->Dtype != SLU_Z || A->Mtype != SLU_GE ) *info = -2; else if ( ldb < m_loc ) *info = -5; else if ( nrhs < 0 ) *info = -6; if ( sp_ienv_dist(2) > sp_ienv_dist(3) ) { *info = 1; printf("ERROR: Relaxation (NREL) cannot be larger than max. supernode size (NSUP).\n" "\t-> Check parameter setting in sp_ienv_dist.c to correct error.\n"); } if ( *info ) { i = -(*info); pxerr_dist("pzgssvx", grid, -*info); return; } factored = (Fact == FACTORED); Equil = (!factored && options->Equil == YES); notran = (options->Trans == NOTRANS); parSymbFact = options->ParSymbFact; iam = grid->iam; job = 5; if ( factored || (Fact == SamePattern_SameRowPerm && Equil) ) { rowequ = (ScalePermstruct->DiagScale == ROW) || (ScalePermstruct->DiagScale == BOTH); colequ = (ScalePermstruct->DiagScale == COL) || (ScalePermstruct->DiagScale == BOTH); } else rowequ = colequ = FALSE; /* The following arrays are replicated on all processes. */ perm_r = ScalePermstruct->perm_r; perm_c = ScalePermstruct->perm_c; etree = LUstruct->etree; R = ScalePermstruct->R; C = ScalePermstruct->C; /********/ #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter pzgssvx()"); #endif /* Not factored & ask for equilibration */ if ( Equil && Fact != SamePattern_SameRowPerm ) { /* Allocate storage if not done so before. */ switch ( ScalePermstruct->DiagScale ) { case NOEQUIL: if ( !(R = (double *) doubleMalloc_dist(m)) ) ABORT("Malloc fails for R[]."); if ( !(C = (double *) doubleMalloc_dist(n)) ) ABORT("Malloc fails for C[]."); ScalePermstruct->R = R; ScalePermstruct->C = C; break; case ROW: if ( !(C = (double *) doubleMalloc_dist(n)) ) ABORT("Malloc fails for C[]."); ScalePermstruct->C = C; break; case COL: if ( !(R = (double *) doubleMalloc_dist(m)) ) ABORT("Malloc fails for R[]."); ScalePermstruct->R = R; break; } } /* ------------------------------------------------------------ * Diagonal scaling to equilibrate the matrix. (simple scheme) * for row i = 1:n, A(i,:) <- A(i,:) / max(abs(A(i,:)); * for column j = 1:n, A(:,j) <- A(:, j) / max(abs(A(:,j)) * ------------------------------------------------------------*/ if ( Equil ) { #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter equil"); #endif t = SuperLU_timer_(); if ( Fact == SamePattern_SameRowPerm ) { /* Reuse R and C. */ switch ( ScalePermstruct->DiagScale ) { case NOEQUIL: break; case ROW: irow = fst_row; for (j = 0; j < m_loc; ++j) { for (i = rowptr[j]; i < rowptr[j+1]; ++i) { zd_mult(&a[i], &a[i], R[irow]); /* Scale rows */ } ++irow; } break; case COL: for (j = 0; j < m_loc; ++j) for (i = rowptr[j]; i < rowptr[j+1]; ++i){ icol = colind[i]; zd_mult(&a[i], &a[i], C[icol]); /* Scale columns */ } break; case BOTH: irow = fst_row; for (j = 0; j < m_loc; ++j) { for (i = rowptr[j]; i < rowptr[j+1]; ++i) { icol = colind[i]; zd_mult(&a[i], &a[i], R[irow]); /* Scale rows */ zd_mult(&a[i], &a[i], C[icol]); /* Scale columns */ } ++irow; } break; } } else { /* Compute R & C from scratch */ /* Compute the row and column scalings. */ pzgsequ(A, R, C, &rowcnd, &colcnd, &amax, &iinfo, grid); if ( iinfo > 0 ) { if ( iinfo <= m ) { #if ( PRNTlevel>=1 ) fprintf(stderr, "The " IFMT "-th row of A is exactly zero\n", iinfo); #endif } else { #if ( PRNTlevel>=1 ) fprintf(stderr, "The " IFMT "-th column of A is exactly zero\n", iinfo-n); #endif } } else if ( iinfo < 0 ) return; /* Now iinfo == 0 */ /* Equilibrate matrix A if it is badly-scaled. A <-- diag(R)*A*diag(C) */ pzlaqgs(A, R, C, rowcnd, colcnd, amax, equed); if ( strncmp(equed, "R", 1)==0 ) { ScalePermstruct->DiagScale = ROW; rowequ = ROW; } else if ( strncmp(equed, "C", 1)==0 ) { ScalePermstruct->DiagScale = COL; colequ = COL; } else if ( strncmp(equed, "B", 1)==0 ) { ScalePermstruct->DiagScale = BOTH; rowequ = ROW; colequ = COL; } else ScalePermstruct->DiagScale = NOEQUIL; #if ( PRNTlevel>=1 ) if ( !iam ) { printf(".. equilibrated? *equed = %c\n", *equed); fflush(stdout); } #endif } /* end if Fact ... */ stat->utime[EQUIL] = SuperLU_timer_() - t; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit equil"); #endif } /* end if Equil ... LAPACK style, not involving MC64 */ if ( !factored ) { /* Skip this if already factored. */ /* * For serial symbolic factorization, gather A from the distributed * compressed row format to global A in compressed column format. * Numerical values are gathered only when a row permutation * for large diagonal is sought after. */ if ( Fact != SamePattern_SameRowPerm && (parSymbFact == NO || options->RowPerm != NO) ) { /* Performs serial symbolic factorzation and/or MC64 */ need_value = (options->RowPerm == LargeDiag); pzCompRow_loc_to_CompCol_global(need_value, A, grid, &GA); GAstore = (NCformat *) GA.Store; colptr = GAstore->colptr; rowind = GAstore->rowind; nnz = GAstore->nnz; GA_mem_use = (nnz + n + 1) * sizeof(int_t); if ( need_value ) { a_GA = (doublecomplex *) GAstore->nzval; GA_mem_use += nnz * sizeof(doublecomplex); } else assert(GAstore->nzval == NULL); } /* ------------------------------------------------------------ Find the row permutation Pr for A, and apply Pr*[GA]. GA is overwritten by Pr*[GA]. ------------------------------------------------------------*/ if ( options->RowPerm != NO ) { t = SuperLU_timer_(); if ( Fact != SamePattern_SameRowPerm ) { if ( options->RowPerm == MY_PERMR ) { /* Use user's perm_r. */ /* Permute the global matrix GA for symbfact() */ for (i = 0; i < colptr[n]; ++i) { irow = rowind[i]; rowind[i] = perm_r[irow]; } } else { /* options->RowPerm == LargeDiag */ /* Get a new perm_r[] */ if ( job == 5 ) { /* Allocate storage for scaling factors. */ if ( !(R1 = doubleMalloc_dist(m)) ) ABORT("SUPERLU_MALLOC fails for R1[]"); if ( !(C1 = doubleMalloc_dist(n)) ) ABORT("SUPERLU_MALLOC fails for C1[]"); } if ( !iam ) { /* Process 0 finds a row permutation */ iinfo = zldperm_dist(job, m, nnz, colptr, rowind, a_GA, perm_r, R1, C1); MPI_Bcast( &iinfo, 1, mpi_int_t, 0, grid->comm ); if ( iinfo == 0 ) { MPI_Bcast( perm_r, m, mpi_int_t, 0, grid->comm ); if ( job == 5 && Equil ) { MPI_Bcast( R1, m, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( C1, n, MPI_DOUBLE, 0, grid->comm ); } } } else { MPI_Bcast( &iinfo, 1, mpi_int_t, 0, grid->comm ); if ( iinfo == 0 ) { MPI_Bcast( perm_r, m, mpi_int_t, 0, grid->comm ); if ( job == 5 && Equil ) { MPI_Bcast( R1, m, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( C1, n, MPI_DOUBLE, 0, grid->comm ); } } } if ( iinfo && job == 5) { /* Error return */ SUPERLU_FREE(R1); SUPERLU_FREE(C1); } #if ( PRNTlevel>=2 ) dmin = dmach_dist("Overflow"); dsum = 0.0; dprod = 1.0; #endif if ( iinfo == 0 ) { if ( job == 5 ) { if ( Equil ) { for (i = 0; i < n; ++i) { R1[i] = exp(R1[i]); C1[i] = exp(C1[i]); } /* Scale the distributed matrix further. A <-- diag(R1)*A*diag(C1) */ irow = fst_row; for (j = 0; j < m_loc; ++j) { for (i = rowptr[j]; i < rowptr[j+1]; ++i) { icol = colind[i]; zd_mult(&a[i], &a[i], R1[irow]); zd_mult(&a[i], &a[i], C1[icol]); #if ( PRNTlevel>=2 ) if ( perm_r[irow] == icol ) { /* New diagonal */ if ( job == 2 || job == 3 ) dmin = SUPERLU_MIN(dmin, slud_z_abs1(&a[i])); else if ( job == 4 ) dsum += slud_z_abs1(&a[i]); else if ( job == 5 ) dprod *= slud_z_abs1(&a[i]); } #endif } ++irow; } /* Multiply together the scaling factors -- R/C from simple scheme, R1/C1 from MC64. */ if ( rowequ ) for (i = 0; i < m; ++i) R[i] *= R1[i]; else for (i = 0; i < m; ++i) R[i] = R1[i]; if ( colequ ) for (i = 0; i < n; ++i) C[i] *= C1[i]; else for (i = 0; i < n; ++i) C[i] = C1[i]; ScalePermstruct->DiagScale = BOTH; rowequ = colequ = 1; } /* end Equil */ /* Now permute global GA to prepare for symbfact() */ for (j = 0; j < n; ++j) { for (i = colptr[j]; i < colptr[j+1]; ++i) { irow = rowind[i]; rowind[i] = perm_r[irow]; } } SUPERLU_FREE (R1); SUPERLU_FREE (C1); } else { /* job = 2,3,4 */ for (j = 0; j < n; ++j) { for (i = colptr[j]; i < colptr[j+1]; ++i) { irow = rowind[i]; rowind[i] = perm_r[irow]; } /* end for i ... */ } /* end for j ... */ } /* end else job ... */ } else { /* if iinfo != 0 */ for (i = 0; i < m; ++i) perm_r[i] = i; } #if ( PRNTlevel>=2 ) if ( job == 2 || job == 3 ) { if ( !iam ) printf("\tsmallest diagonal %e\n", dmin); } else if ( job == 4 ) { if ( !iam ) printf("\tsum of diagonal %e\n", dsum); } else if ( job == 5 ) { if ( !iam ) printf("\t product of diagonal %e\n", dprod); } #endif } /* end if options->RowPerm ... */ t = SuperLU_timer_() - t; stat->utime[ROWPERM] = t; #if ( PRNTlevel>=1 ) if ( !iam ) { printf(".. LDPERM job " IFMT "\t time: %.2f\n", job, t); fflush(stdout); } #endif } /* end if Fact ... */ } else { /* options->RowPerm == NOROWPERM / NATURAL */ for (i = 0; i < m; ++i) perm_r[i] = i; } #if ( DEBUGlevel>=2 ) if ( !iam ) PrintInt10("perm_r", m, perm_r); #endif } /* end if (!factored) */ if ( !factored || options->IterRefine ) { /* Compute norm(A), which will be used to adjust small diagonal. */ if ( notran ) *(unsigned char *)norm = '1'; else *(unsigned char *)norm = 'I'; anorm = pzlangs(norm, A, grid); #if ( PRNTlevel>=1 ) if ( !iam ) { printf(".. anorm %e\n", anorm); fflush(stdout); } #endif } /* ------------------------------------------------------------ Perform the LU factorization: symbolic factorization, redistribution, and numerical factorization. ------------------------------------------------------------*/ if ( !factored ) { t = SuperLU_timer_(); /* * Get column permutation vector perm_c[], according to permc_spec: * permc_spec = NATURAL: natural ordering * permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A * permc_spec = MMD_ATA: minimum degree on structure of A'*A * permc_spec = METIS_AT_PLUS_A: METIS on structure of A'+A * permc_spec = PARMETIS: parallel METIS on structure of A'+A * permc_spec = MY_PERMC: the ordering already supplied in perm_c[] */ permc_spec = options->ColPerm; if ( parSymbFact == YES || permc_spec == PARMETIS ) { nprocs_num = grid->nprow * grid->npcol; noDomains = (int) ( pow(2, ((int) LOG2( nprocs_num )))); /* create a new communicator for the first noDomains processes in grid->comm */ key = iam; if (iam < noDomains) col = 0; else col = MPI_UNDEFINED; MPI_Comm_split (grid->comm, col, key, &symb_comm ); if ( permc_spec == NATURAL || permc_spec == MY_PERMC ) { if ( permc_spec == NATURAL ) { for (j = 0; j < n; ++j) perm_c[j] = j; } if ( !(sizes = intMalloc_dist(2 * noDomains)) ) ABORT("SUPERLU_MALLOC fails for sizes."); if ( !(fstVtxSep = intMalloc_dist(2 * noDomains)) ) ABORT("SUPERLU_MALLOC fails for fstVtxSep."); for (i = 0; i < 2*noDomains - 2; ++i) { sizes[i] = 0; fstVtxSep[i] = 0; } sizes[2*noDomains - 2] = m; fstVtxSep[2*noDomains - 2] = 0; } else if ( permc_spec != PARMETIS ) { /* same as before */ printf("{" IFMT "," IFMT "}: pzgssvx: invalid ColPerm option when ParSymbfact is used\n", MYROW(grid->iam, grid), MYCOL(grid->iam, grid)); } } if ( permc_spec != MY_PERMC && Fact == DOFACT ) { /* Reuse perm_c if Fact == SamePattern, or SamePattern_SameRowPerm */ if ( permc_spec == PARMETIS ) { /* Get column permutation vector in perm_c. * * This routine takes as input the distributed input matrix A * * and does not modify it. It also allocates memory for * * sizes[] and fstVtxSep[] arrays, that contain information * * on the separator tree computed by ParMETIS. */ flinfo = get_perm_c_parmetis(A, perm_r, perm_c, nprocs_num, noDomains, &sizes, &fstVtxSep, grid, &symb_comm); if (flinfo > 0) { #if ( PRNTlevel>=1 ) fprintf(stderr, "Insufficient memory for get_perm_c parmetis\n"); #endif *info = flinfo; return; } } else { get_perm_c_dist(iam, permc_spec, &GA, perm_c); } } stat->utime[COLPERM] = SuperLU_timer_() - t; /* Compute the elimination tree of Pc*(A^T+A)*Pc^T or Pc*A^T*A*Pc^T (a.k.a. column etree), depending on the choice of ColPerm. Adjust perm_c[] to be consistent with a postorder of etree. Permute columns of A to form A*Pc'. */ if ( Fact != SamePattern_SameRowPerm ) { if ( parSymbFact == NO ) { /* Perform serial symbolic factorization */ /* GA = Pr*A, perm_r[] is already applied. */ int_t *GACcolbeg, *GACcolend, *GACrowind; /* After this routine, GAC = GA*Pc^T. */ sp_colorder(options, &GA, perm_c, etree, &GAC); /* Form Pc*A*Pc^T to preserve the diagonal of the matrix GAC. */ GACstore = (NCPformat *) GAC.Store; GACcolbeg = GACstore->colbeg; GACcolend = GACstore->colend; GACrowind = GACstore->rowind; for (j = 0; j < n; ++j) { for (i = GACcolbeg[j]; i < GACcolend[j]; ++i) { irow = GACrowind[i]; GACrowind[i] = perm_c[irow]; } } /* Perform a symbolic factorization on Pc*Pr*A*Pc^T and set up the nonzero data structures for L & U. */ #if ( PRNTlevel>=1 ) if ( !iam ) { printf(".. symbfact(): relax " IFMT ", maxsuper " IFMT ", fill " IFMT "\n", sp_ienv_dist(2), sp_ienv_dist(3), sp_ienv_dist(6)); fflush(stdout); } #endif t = SuperLU_timer_(); if ( !(Glu_freeable = (Glu_freeable_t *) SUPERLU_MALLOC(sizeof(Glu_freeable_t))) ) ABORT("Malloc fails for Glu_freeable."); /* Every process does this. */ iinfo = symbfact(options, iam, &GAC, perm_c, etree, Glu_persist, Glu_freeable); stat->utime[SYMBFAC] = SuperLU_timer_() - t; if ( iinfo <= 0 ) { /* Successful return */ QuerySpace_dist(n, -iinfo, Glu_freeable, &symb_mem_usage); #if ( PRNTlevel>=1 ) if ( !iam ) { printf("\tNo of supers " IFMT "\n", (long long) Glu_persist->supno[n-1]+1); printf("\tSize of G(L) " IFMT "\n", (long long) Glu_freeable->xlsub[n]); printf("\tSize of G(U) " IFMT "\n", (long long) Glu_freeable->xusub[n]); printf("\tint %d, short %d, float %d, double %d\n", (int) sizeof(int_t), (int) sizeof(short), (int) sizeof(float), (int) sizeof(double)); printf("\tSYMBfact (MB):\tL\\U %.2f\ttotal %.2f\texpansions " IFMT "\n", symb_mem_usage.for_lu*1e-6, symb_mem_usage.total*1e-6, symb_mem_usage.expansions); fflush(stdout); } #endif } else { /* symbfact out of memory */ #if ( PRNTlevel>=1 ) if ( !iam ) fprintf(stderr,"symbfact() error returns " IFMT "\n",iinfo); #endif *info = iinfo; return; } } /* end serial symbolic factorization */ else { /* parallel symbolic factorization */ t = SuperLU_timer_(); flinfo = symbfact_dist(nprocs_num, noDomains, A, perm_c, perm_r, sizes, fstVtxSep, &Pslu_freeable, &(grid->comm), &symb_comm, &symb_mem_usage); stat->utime[SYMBFAC] = SuperLU_timer_() - t; if (flinfo > 0) { #if ( PRNTlevel>=1 ) fprintf(stderr, "Insufficient memory for parallel symbolic factorization."); #endif *info = flinfo; return; } } /* Destroy global GA */ if ( parSymbFact == NO || options->RowPerm != NO ) Destroy_CompCol_Matrix_dist(&GA); if ( parSymbFact == NO ) Destroy_CompCol_Permuted_dist(&GAC); } /* end if Fact ... */ if (sizes) SUPERLU_FREE (sizes); if (fstVtxSep) SUPERLU_FREE (fstVtxSep); if (symb_comm != MPI_COMM_NULL) MPI_Comm_free (&symb_comm); if (parSymbFact == NO || Fact == SamePattern_SameRowPerm) { /* CASE OF SERIAL SYMBOLIC */ /* Apply column permutation to the original distributed A */ for (j = 0; j < nnz_loc; ++j) colind[j] = perm_c[colind[j]]; /* Distribute Pc*Pr*diag(R)*A*diag(C)*Pc^T into L and U storage. NOTE: the row permutation Pc*Pr is applied internally in the distribution routine. */ t = SuperLU_timer_(); dist_mem_use = pzdistribute(Fact, n, A, ScalePermstruct, Glu_freeable, LUstruct, grid); stat->utime[DIST] = SuperLU_timer_() - t; /* Deallocate storage used in symbolic factorization. */ if ( Fact != SamePattern_SameRowPerm ) { iinfo = symbfact_SubFree(Glu_freeable); SUPERLU_FREE(Glu_freeable); } } else { /* CASE OF PARALLEL SYMBOLIC */ /* Distribute Pc*Pr*diag(R)*A*diag(C)*Pc' into L and U storage. NOTE: the row permutation Pc*Pr is applied internally in the distribution routine. */ /* Apply column permutation to the original distributed A */ for (j = 0; j < nnz_loc; ++j) colind[j] = perm_c[colind[j]]; t = SuperLU_timer_(); dist_mem_use = zdist_psymbtonum(Fact, n, A, ScalePermstruct, &Pslu_freeable, LUstruct, grid); if (dist_mem_use > 0) ABORT ("Not enough memory available for dist_psymbtonum\n"); stat->utime[DIST] = SuperLU_timer_() - t; } /*if (!iam) printf ("\tDISTRIBUTE time %8.2f\n", stat->utime[DIST]);*/ /* Perform numerical factorization in parallel. */ t = SuperLU_timer_(); pzgstrf(options, m, n, anorm, LUstruct, grid, stat, info); stat->utime[FACT] = SuperLU_timer_() - t; #if 0 // #ifdef GPU_PROF // if(!iam ) // { // char* ttemp; // ttemp = getenv("IO_FILE"); // if(ttemp!=NULL) // { // printf("File being opend is %s\n",ttemp ); // FILE* fp; // fp = fopen(ttemp,"w"); // if(!fp) // { // fprintf(stderr," Couldn't open output file %s\n",ttemp); // } // int nsup=Glu_persist->supno[n-1]+1; // int ii; // for (ii = 0; ii < nsup; ++ii) // { // fprintf(fp,"%d,%d,%d,%d,%d,%d\n",gs1.mnk_min_stats[ii],gs1.mnk_min_stats[ii+nsup], // gs1.mnk_min_stats[ii+2*nsup], // gs1.mnk_max_stats[ii],gs1.mnk_max_stats[ii+nsup],gs1.mnk_max_stats[ii+2*nsup]); // } // // lastly put the timeing stats that we need // fprintf(fp,"Min %lf Max %lf totaltime %lf \n",gs1.osDgemmMin,gs1.osDgemmMax,stat->utime[FACT]); // fclose(fp); // } // } // #endif #endif if ( options->PrintStat ) { int_t TinyPivots; float for_lu, total, max, avg, temp; zQuerySpace_dist(n, LUstruct, grid, stat, &num_mem_usage); if (parSymbFact == TRUE) { /* The memory used in the redistribution routine includes the memory used for storing the symbolic structure and the memory allocated for numerical factorization */ temp = SUPERLU_MAX(symb_mem_usage.total, -dist_mem_use); if ( options->RowPerm != NO ) temp = SUPERLU_MAX(temp, GA_mem_use); } else { temp = SUPERLU_MAX ( symb_mem_usage.total + GA_mem_use, /* symbfact step */ symb_mem_usage.for_lu + dist_mem_use + num_mem_usage.for_lu /* distribution step */ ); } temp = SUPERLU_MAX(temp, num_mem_usage.total); MPI_Reduce( &temp, &max, 1, MPI_FLOAT, MPI_MAX, 0, grid->comm ); MPI_Reduce( &temp, &avg, 1, MPI_FLOAT, MPI_SUM, 0, grid->comm ); MPI_Allreduce( &stat->TinyPivots, &TinyPivots, 1, mpi_int_t, MPI_SUM, grid->comm ); stat->TinyPivots = TinyPivots; MPI_Reduce( &num_mem_usage.for_lu, &for_lu, 1, MPI_FLOAT, MPI_SUM, 0, grid->comm ); MPI_Reduce( &num_mem_usage.total, &total, 1, MPI_FLOAT, MPI_SUM, 0, grid->comm ); if (!iam) { printf("\n** Memory Usage **********************************\n"); printf("** NUMfact space (MB): (sum-of-all-processes)\n" " L\\U : %8.2f | Total : %8.2f\n", for_lu * 1e-6, total * 1e-6); printf("** Total highmark (MB):\n" " Sum-of-all : %8.2f | Avg : %8.2f | Max : %8.2f\n", avg * 1e-6, avg / grid->nprow / grid->npcol * 1e-6, max * 1e-6); printf("**************************************************\n"); fflush(stdout); } } /* end printing stats */ } /* end if (!factored) */ if ( options->Fact == DOFACT || options->Fact == SamePattern ) { /* Need to reset the solve's communication pattern, because perm_r[] and/or perm_c[] is changed. */ if ( options->SolveInitialized == YES ) { /* Initialized before */ zSolveFinalize(options, SOLVEstruct); /* Clean up structure */ options->SolveInitialized = NO; /* Reset the solve state */ } } #if 0 /* Need to revisit: Why the following is not good enough for X-to-B distribution -- inv_perm_c changed */ pxgstrs_finalize(SOLVEstruct->gstrs_comm); pxgstrs_init(A->ncol, m_loc, nrhs, fst_row, perm_r, perm_c, grid, LUstruct->Glu_persist, SOLVEstruct); #endif /* ------------------------------------------------------------ Compute the solution matrix X. ------------------------------------------------------------*/ if ( nrhs && *info == 0 ) { if ( !(b_work = doublecomplexMalloc_dist(n)) ) ABORT("Malloc fails for b_work[]"); /* ------------------------------------------------------------ Scale the right-hand side if equilibration was performed. ------------------------------------------------------------*/ if ( notran ) { if ( rowequ ) { b_col = B; for (j = 0; j < nrhs; ++j) { irow = fst_row; for (i = 0; i < m_loc; ++i) { zd_mult(&b_col[i], &b_col[i], R[irow]); ++irow; } b_col += ldb; } } } else if ( colequ ) { b_col = B; for (j = 0; j < nrhs; ++j) { irow = fst_row; for (i = 0; i < m_loc; ++i) { zd_mult(&b_col[i], &b_col[i], C[irow]); ++irow; } b_col += ldb; } } /* Save a copy of the right-hand side. */ ldx = ldb; if ( !(X = doublecomplexMalloc_dist(((size_t)ldx) * nrhs)) ) ABORT("Malloc fails for X[]"); x_col = X; b_col = B; for (j = 0; j < nrhs; ++j) { #if 0 /* Sherry */ for (i = 0; i < m_loc; ++i) x_col[i] = b_col[i]; #endif memcpy(x_col, b_col, m_loc * sizeof(doublecomplex)); x_col += ldx; b_col += ldb; } /* ------------------------------------------------------------ Solve the linear system. ------------------------------------------------------------*/ if ( options->SolveInitialized == NO ) { /* First time */ zSolveInit(options, A, perm_r, perm_c, nrhs, LUstruct, grid, SOLVEstruct); /* Inside this routine, SolveInitialized is set to YES. For repeated call to pzgssvx(), no need to re-initialilze the Solve data & communication structures, unless a new factorization with Fact == DOFACT or SamePattern is asked for. */ } pzgstrs(n, LUstruct, ScalePermstruct, grid, X, m_loc, fst_row, ldb, nrhs, SOLVEstruct, stat, info); /* ------------------------------------------------------------ Use iterative refinement to improve the computed solution and compute error bounds and backward error estimates for it. ------------------------------------------------------------*/ if ( options->IterRefine ) { /* Improve the solution by iterative refinement. */ int_t *it; int_t *colind_gsmv = SOLVEstruct->A_colind_gsmv; /* This was allocated and set to NULL in zSolveInit() */ SOLVEstruct_t *SOLVEstruct1; /* Used by refinement. */ t = SuperLU_timer_(); if ( options->RefineInitialized == NO || Fact == DOFACT ) { /* All these cases need to re-initialize gsmv structure */ if ( options->RefineInitialized ) pzgsmv_finalize(SOLVEstruct->gsmv_comm); pzgsmv_init(A, SOLVEstruct->row_to_proc, grid, SOLVEstruct->gsmv_comm); /* Save a copy of the transformed local col indices in colind_gsmv[]. */ if ( colind_gsmv ) SUPERLU_FREE(colind_gsmv); if ( !(it = intMalloc_dist(nnz_loc)) ) ABORT("Malloc fails for colind_gsmv[]"); colind_gsmv = SOLVEstruct->A_colind_gsmv = it; for (i = 0; i < nnz_loc; ++i) colind_gsmv[i] = colind[i]; options->RefineInitialized = YES; } else if ( Fact == SamePattern || Fact == SamePattern_SameRowPerm ) { doublecomplex atemp; int_t k, jcol, p; /* Swap to beginning the part of A corresponding to the local part of X, as was done in pzgsmv_init() */ for (i = 0; i < m_loc; ++i) { /* Loop through each row */ k = rowptr[i]; for (j = rowptr[i]; j < rowptr[i+1]; ++j) { jcol = colind[j]; p = SOLVEstruct->row_to_proc[jcol]; if ( p == iam ) { /* Local */ atemp = a[k]; a[k] = a[j]; a[j] = atemp; ++k; } } } /* Re-use the local col indices of A obtained from the previous call to pzgsmv_init() */ for (i = 0; i < nnz_loc; ++i) colind[i] = colind_gsmv[i]; } if ( nrhs == 1 ) { /* Use the existing solve structure */ SOLVEstruct1 = SOLVEstruct; } else { /* For nrhs > 1, since refinement is performed for RHS one at a time, the communication structure for pdgstrs is different than the solve with nrhs RHS. So we use SOLVEstruct1 for the refinement step. */ if ( !(SOLVEstruct1 = (SOLVEstruct_t *) SUPERLU_MALLOC(sizeof(SOLVEstruct_t))) ) ABORT("Malloc fails for SOLVEstruct1"); /* Copy the same stuff */ SOLVEstruct1->row_to_proc = SOLVEstruct->row_to_proc; SOLVEstruct1->inv_perm_c = SOLVEstruct->inv_perm_c; SOLVEstruct1->num_diag_procs = SOLVEstruct->num_diag_procs; SOLVEstruct1->diag_procs = SOLVEstruct->diag_procs; SOLVEstruct1->diag_len = SOLVEstruct->diag_len; SOLVEstruct1->gsmv_comm = SOLVEstruct->gsmv_comm; SOLVEstruct1->A_colind_gsmv = SOLVEstruct->A_colind_gsmv; /* Initialize the *gstrs_comm for 1 RHS. */ if ( !(SOLVEstruct1->gstrs_comm = (pxgstrs_comm_t *) SUPERLU_MALLOC(sizeof(pxgstrs_comm_t))) ) ABORT("Malloc fails for gstrs_comm[]"); pxgstrs_init(n, m_loc, 1, fst_row, perm_r, perm_c, grid, Glu_persist, SOLVEstruct1); } pzgsrfs(n, A, anorm, LUstruct, ScalePermstruct, grid, B, ldb, X, ldx, nrhs, SOLVEstruct1, berr, stat, info); /* Deallocate the storage associated with SOLVEstruct1 */ if ( nrhs > 1 ) { pxgstrs_finalize(SOLVEstruct1->gstrs_comm); SUPERLU_FREE(SOLVEstruct1); } stat->utime[REFINE] = SuperLU_timer_() - t; } /* end if IterRefine */ /* Permute the solution matrix B <= Pc'*X. */ pzPermute_Dense_Matrix(fst_row, m_loc, SOLVEstruct->row_to_proc, SOLVEstruct->inv_perm_c, X, ldx, B, ldb, nrhs, grid); #if ( DEBUGlevel>=2 ) printf("\n (%d) .. After pzPermute_Dense_Matrix(): b =\n", iam); for (i = 0; i < m_loc; ++i) printf("\t(%d)\t%4d\t%.10f\n", iam, i+fst_row, B[i]); #endif /* Transform the solution matrix X to a solution of the original system before equilibration. */ if ( notran ) { if ( colequ ) { b_col = B; for (j = 0; j < nrhs; ++j) { irow = fst_row; for (i = 0; i < m_loc; ++i) { zd_mult(&b_col[i], &b_col[i], C[irow]); ++irow; } b_col += ldb; } } } else if ( rowequ ) { b_col = B; for (j = 0; j < nrhs; ++j) { irow = fst_row; for (i = 0; i < m_loc; ++i) { zd_mult(&b_col[i], &b_col[i], R[irow]); ++irow; } b_col += ldb; } } SUPERLU_FREE(b_work); SUPERLU_FREE(X); } /* end if nrhs != 0 && *info == 0 */ #if ( PRNTlevel>=1 ) if ( !iam ) printf(".. DiagScale = %d\n", ScalePermstruct->DiagScale); #endif /* Deallocate R and/or C if it was not used. */ if ( Equil && Fact != SamePattern_SameRowPerm ) { switch ( ScalePermstruct->DiagScale ) { case NOEQUIL: SUPERLU_FREE(R); SUPERLU_FREE(C); break; case ROW: SUPERLU_FREE(C); break; case COL: SUPERLU_FREE(R); break; } } #if 0 if ( !factored && Fact != SamePattern_SameRowPerm && !parSymbFact) Destroy_CompCol_Permuted_dist(&GAC); #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pzgssvx()"); #endif } SuperLU_DIST_5.3.0/SRC/pzgstrf.c0000644013363400111340000022402013233431301015126 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Performs LU factorization in parallel * *
 * -- Distributed SuperLU routine (version 5.2) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * October 1, 2014
 *
 * Modified:
 *     September 1, 1999
 *     Feburary 7, 2001  use MPI_Isend/MPI_Irecv
 *     October 15, 2008  latency-reducing panel factorization
 *     July    12, 2011  static scheduling and arbitrary look-ahead
 *     March   13, 2013  change NTAGS to MPI_TAG_UB value
 *     September 24, 2015 replace xLAMCH by xMACH, using C99 standard.
 *     December 31, 2015 rename xMACH to xMACH_DIST.
 *     September 30, 2017 optimization for Intel Knights Landing (KNL) node .
 *
 * Sketch of the algorithm 
 *
 * ======================= 
 *    
 * The following relations hold:
 *     * A_kk = L_kk * U_kk
 *     * L_ik = Aik * U_kk^(-1)
 *     * U_kj = L_kk^(-1) * A_kj
 *
 *              ----------------------------------
 *              |   |                            |
 *              ----|-----------------------------
 *              |   | \ U_kk|                    |
 *              |   |   \   |        U_kj        |
 *              |   |L_kk \ |         ||         |
 *              ----|-------|---------||----------
 *              |   |       |         \/         |
 *              |   |       |                    |
 *              |   |       |                    |
 *              |   |       |                    |
 *              |   | L_ik ==>       A_ij        |
 *              |   |       |                    |
 *              |   |       |                    |
 *              |   |       |                    |
 *              ----------------------------------
 *
 * Handle the first block of columns separately.
 *     * Factor diagonal and subdiagonal blocks and test for exact
 *       singularity. ( pzgstrf2(0), one column at a time )
 *     * Compute block row of U
 *     * Update trailing matrix
 *
 * Loop over the remaining blocks of columns.
 *   mycol = MYCOL( iam, grid );
 *   myrow = MYROW( iam, grid );
 *   N = nsupers;
 *   For (k = 1; k < N; ++k) {
 *       krow = PROW( k, grid );
 *       kcol = PCOL( k, grid );
 *       Pkk = PNUM( krow, kcol, grid );
 *
 *     * Factor diagonal and subdiagonal blocks and test for exact
 *       singularity.
 *       if ( mycol == kcol ) {
 *           pzgstrf2(k), one column at a time
 *       }
 *
 *     * Parallel triangular solve
 *       if ( iam == Pkk ) multicast L_k,k to this process row;
 *       if ( myrow == krow && mycol != kcol ) {
 *          Recv L_k,k from process Pkk;
 *          for (j = k+1; j < N; ++j)
 *              if ( PCOL( j, grid ) == mycol && A_k,j != 0 )
 *                 U_k,j = L_k,k \ A_k,j;
 *       }
 *
 *     * Parallel rank-k update
 *       if ( myrow == krow ) multicast U_k,k+1:N to this process column;
 *       if ( mycol == kcol ) multicast L_k+1:N,k to this process row;
 *       if ( myrow != krow ) {
 *          Pkj = PNUM( krow, mycol, grid );
 *          Recv U_k,k+1:N from process Pkj;
 *       }
 *       if ( mycol != kcol ) {
 *          Pik = PNUM( myrow, kcol, grid );
 *          Recv L_k+1:N,k from process Pik;
 *       }
 *       for (j = k+1; k < N; ++k) {
 *          for (i = k+1; i < N; ++i)
 *              if ( myrow == PROW( i, grid ) && mycol == PCOL( j, grid )
 *                   && L_i,k != 0 && U_k,j != 0 )
 *                 A_i,j = A_i,j - L_i,k * U_k,j;
 *       }
 *  }
 *
 * 
*/ #include /*#include "mkl.h"*/ #include "superlu_zdefs.h" #ifdef GPU_ACC #include "cublas_utils.h" /*#include "cublas_zgemm.h"*/ // #define NUM_CUDA_STREAMS 16 // #define NUM_CUDA_STREAMS 16 #endif /* Various defininations */ /* Name : SUPERNODE_PROFILE Purpose : For SuperNode Level profiling of various measurements such as gigaflop/sec obtained,bandwidth achieved: Overhead : Low */ // #define SUPERNODE_PROFILE /* Name : BAELINE Purpose : baseline to compare performance against Overhead : NA : this won't be used for running experiments */ // #define BASELINE /* Name : PHI_FRAMEWORK Purpose : To simulate and test algorithm used for offloading Phi Overhead : NA : this won't be used for running experiments */ #define PHI_FRAMEWORK #if 0 #define CACHELINE 64 /* bytes, Xeon Phi KNL */ #else #define CACHELINE 0 /* not worry about false sharing of different threads */ #endif //#define GEMM_PADLEN 1 #define GEMM_PADLEN 8 #define PZGSTRF2 pzgstrf2_trsm #define PZGSTRS2 pzgstrs2_omp extern void PZGSTRF2 (superlu_dist_options_t *, int_t, int_t, double, Glu_persist_t *, gridinfo_t *, LocalLU_t *, MPI_Request *, int, SuperLUStat_t *, int *); #ifdef _CRAY extern void PZGSTRS2 (int_t, int_t, Glu_persist_t *, gridinfo_t *, LocalLU_t *, SuperLUStat_t *, _fcd, _fcd, _fcd); #else extern void PZGSTRS2 (int_t, int_t, Glu_persist_t *, gridinfo_t *, LocalLU_t *, SuperLUStat_t *); #endif #ifdef ISORT extern void isort (int_t N, int_t * ARRAY1, int_t * ARRAY2); extern void isort1 (int_t N, int_t * ARRAY); #else int superlu_sort_perm (const void *arg1, const void *arg2) { const int_t *val1 = (const int_t *) arg1; const int_t *val2 = (const int_t *) arg2; return (*val2 < *val1); } #endif /************************************************************************/ #include "zscatter.c" /************************************************************************/ /*! \brief * *
 * Purpose
 * =======
 *
 * PZGSTRF performs the LU factorization in parallel.
 *
 * Arguments
 * =========
 *
 * options (input) superlu_dist_options_t*
 *         The structure defines the input parameters to control
 *         how the LU decomposition will be performed.
 *         The following field should be defined:
 *         o ReplaceTinyPivot (yes_no_t)
 *           Specifies whether to replace the tiny diagonals by
 *           sqrt(epsilon)*norm(A) during LU factorization.
 *
 * m      (input) int
 *        Number of rows in the matrix.
 *
 * n      (input) int
 *        Number of columns in the matrix.
 *
 * anorm  (input) double
 *        The norm of the original matrix A, or the scaled A if
 *        equilibration was done.
 *
 * LUstruct (input/output) LUstruct_t*
 *         The data structures to store the distributed L and U factors.
 *         The following fields should be defined:
 *
 *         o Glu_persist (input) Glu_persist_t*
 *           Global data structure (xsup, supno) replicated on all processes,
 *           describing the supernode partition in the factored matrices
 *           L and U:
 *         xsup[s] is the leading column of the s-th supernode,
 *             supno[i] is the supernode number to which column i belongs.
 *
 *         o Llu (input/output) LocalLU_t*
 *           The distributed data structures to store L and U factors.
 *           See superlu_ddefs.h for the definition of 'LocalLU_t'.
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh. It contains the MPI communicator, the number
 *        of process rows (NPROW), the number of process columns (NPCOL),
 *        and my process rank. It is an input argument to all the
 *        parallel routines.
 *        Grid can be initialized by subroutine SUPERLU_GRIDINIT.
 *        See superlu_ddefs.h for the definition of 'gridinfo_t'.
 *
 * stat   (output) SuperLUStat_t*
 *        Record the statistics on runtime and floating-point operation count.
 *        See util.h for the definition of 'SuperLUStat_t'.
 *
 * info   (output) int*
 *        = 0: successful exit
 *        < 0: if info = -i, the i-th argument had an illegal value
 *        > 0: if info = i, U(i,i) is exactly zero. The factorization has
 *             been completed, but the factor U is exactly singular,
 *             and division by zero will occur if it is used to solve a
 *             system of equations.
 * 
*/ int_t pzgstrf(superlu_dist_options_t * options, int m, int n, double anorm, LUstruct_t * LUstruct, gridinfo_t * grid, SuperLUStat_t * stat, int *info) { #ifdef _CRAY _fcd ftcs = _cptofcd ("N", strlen ("N")); _fcd ftcs1 = _cptofcd ("L", strlen ("L")); _fcd ftcs2 = _cptofcd ("N", strlen ("N")); _fcd ftcs3 = _cptofcd ("U", strlen ("U")); #endif doublecomplex zero = {0.0, 0.0}; doublecomplex alpha = {1.0, 0.0}, beta = {0.0, 0.0}; int_t *xsup; int_t *lsub, *lsub1, *usub, *Usub_buf; int_t **Lsub_buf_2, **Usub_buf_2; doublecomplex **Lval_buf_2, **Uval_buf_2; /* pointers to starts of bufs */ doublecomplex *lusup, *lusup1, *uval, *Uval_buf; /* pointer to current buf */ int_t fnz, i, ib, ijb, ilst, it, iukp, jb, jj, klst, knsupc, lb, lib, ldv, ljb, lptr, lptr0, lptrj, luptr, luptr0, luptrj, nlb, nub, nsupc, rel, rukp, il, iu; int_t Pc, Pr; int iam, kcol, krow, yourcol, mycol, myrow, pi, pj; int j, k, lk, nsupers; /* k - current panel to work on */ int k0; /* counter of the next supernode to be factored */ int kk, kk0, kk1, kk2, jj0; /* panels in the look-ahead window */ int iukp0, rukp0, flag0, flag1; int nsupr, nbrow, segsize; int msg0, msg2; int_t **Ufstnz_br_ptr, **Lrowind_bc_ptr; doublecomplex **Unzval_br_ptr, **Lnzval_bc_ptr; int_t *index; doublecomplex *nzval; int_t *iuip, *ruip; /* Pointers to U index/nzval; size ceil(NSUPERS/Pr). */ doublecomplex *ucol; int *indirect, *indirect2; int_t *tempi; doublecomplex *tempu, *tempv, *tempr; /* doublecomplex *tempv2d, *tempU2d; Sherry */ int iinfo; int *ToRecv, *ToSendD, **ToSendR; Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; superlu_scope_t *scp; float s_eps; double thresh; /*int full;*/ int ldt, ldu, lead_zero, ncols, ncb, nrb, p, pr, pc, nblocks; int_t *etree_supno_l, *etree_supno, *blocks, *blockr, *Ublock, *Urows, *Lblock, *Lrows, *perm_u, *sf_block, *sf_block_l, *nnodes_l, *nnodes_u, *edag_supno_l, *recvbuf, **edag_supno; float edag_supno_l_bytes; #ifdef ISORT int_t *iperm_u; #endif int *msgcnt; /* Count the size of the message xfer'd in each buffer: * 0 : transferred in Lsub_buf[] * 1 : transferred in Lval_buf[] * 2 : transferred in Usub_buf[] * 3 : transferred in Uval_buf[] */ int **msgcnts, **msgcntsU; /* counts in the look-ahead window */ int *factored; /* factored[j] == 0 : L col panel j is factorized. */ int *factoredU; /* factoredU[i] == 1 : U row panel i is factorized. */ int nnodes, *sendcnts, *sdispls, *recvcnts, *rdispls, *srows, *rrows; etree_node *head, *tail, *ptr; int *num_child; int num_look_aheads, look_id, *look_ahead; int_t *perm_c_supno, *iperm_c_supno; MPI_Request *recv_req, **recv_reqs, **send_reqs, **send_reqs_u, **recv_reqs_u; MPI_Request *send_req, *U_diag_blk_send_req = NULL; MPI_Status status; void *attr_val; int flag; /* The following variables are used to pad GEMM dimensions so that each is a multiple of vector length (8 doubles for KNL) */ int gemm_m_pad = GEMM_PADLEN, gemm_k_pad = GEMM_PADLEN, gemm_n_pad = GEMM_PADLEN; int gemm_padding = 0; int iword = sizeof (int_t); int dword = sizeof (doublecomplex); /* For measuring load imbalence in omp threads */ double omp_load_imblc = 0.0; double *omp_loop_time; double schur_flop_timer = 0.0; double pdgstrf2_timer = 0.0; double pdgstrs2_timer = 0.0; double lookaheadupdatetimer = 0.0; double InitTimer = 0.0; /* including compute schedule, malloc */ double tt_start, tt_end; /* #if !defined( GPU_ACC ) */ /* Counters for memory operations and timings */ double scatter_mem_op_counter = 0.0; double scatter_mem_op_timer = 0.0; double scatterL_mem_op_counter = 0.0; double scatterL_mem_op_timer = 0.0; double scatterU_mem_op_counter = 0.0; double scatterU_mem_op_timer = 0.0; /* Counters for flops/gather/scatter and timings */ double GatherLTimer = 0.0; double LookAheadRowSepMOP = 0.0; double GatherUTimer = 0.0; double GatherMOP = 0.0; double LookAheadGEMMTimer = 0.0; double LookAheadGEMMFlOp = 0.0; double LookAheadScatterTimer = 0.0; double LookAheadScatterMOP = 0.0; double RemainGEMMTimer = 0.0; double RemainGEMM_flops = 0.0; double RemainScatterTimer = 0.0; double NetSchurUpTimer = 0.0; double schur_flop_counter = 0.0; /* #endif */ #if ( PRNTlevel>= 1) /* count GEMM max dimensions */ int gemm_max_m = 0, gemm_max_n = 0, gemm_max_k = 0; #endif #if ( DEBUGlevel>=2 ) int_t num_copy = 0, num_update = 0; #endif #if ( PRNTlevel==3 ) int zero_msg = 0, total_msg = 0; #endif #if ( PROFlevel>=1 ) double t1, t2; float msg_vol = 0, msg_cnt = 0; double comm_wait_time = 0.0; /* Record GEMM dimensions and times */ FILE *fopen(), *fgemm; int gemm_count = 0; typedef struct { int m, n, k; double microseconds; } gemm_profile; gemm_profile *gemm_stats; #endif /* Test the input parameters. */ *info = 0; if (m < 0) *info = -2; else if (n < 0) *info = -3; if (*info) { pxerr_dist ("pzgstrf", grid, -*info); return (-1); } /* Quick return if possible. */ if (m == 0 || n == 0) return 0; double tt1 = SuperLU_timer_ (); /* * Initialization. */ iam = grid->iam; Pc = grid->npcol; Pr = grid->nprow; myrow = MYROW (iam, grid); mycol = MYCOL (iam, grid); nsupers = Glu_persist->supno[n - 1] + 1; xsup = Glu_persist->xsup; s_eps = smach_dist("Epsilon"); thresh = s_eps * anorm; MPI_Attr_get (MPI_COMM_WORLD, MPI_TAG_UB, &attr_val, &flag); if (!flag) { fprintf (stderr, "Could not get TAG_UB\n"); return (-1); } int tag_ub = *(int *) attr_val; #if ( PRNTlevel>=1 ) if (!iam) { printf ("MPI tag upper bound = %d\n", tag_ub); fflush(stdout); } #endif #if ( DEBUGlevel>=1 ) if (s_eps == 0.0) printf (" ***** warning s_eps = %e *****\n", s_eps); CHECK_MALLOC (iam, "Enter pdgstrf()"); #endif #if (PROFlevel >= 1 ) gemm_stats = (gemm_profile *) SUPERLU_MALLOC(nsupers * sizeof(gemm_profile)); if (iam == 0) fgemm = fopen("dgemm_mnk.dat", "w"); int *prof_sendR = intCalloc_dist(nsupers); #endif stat->ops[FACT] = 0.0; stat->current_buffer = 0.0; stat->peak_buffer = 0.0; stat->gpu_buffer = 0.0; /* make sure the range of look-ahead window [0, MAX_LOOKAHEADS-1] */ num_look_aheads = SUPERLU_MAX(0, SUPERLU_MIN(options->num_lookaheads, MAX_LOOKAHEADS - 1)); if (Pr * Pc > 1) { if (!(U_diag_blk_send_req = (MPI_Request *) SUPERLU_MALLOC (Pr * sizeof (MPI_Request)))) ABORT ("Malloc fails for U_diag_blk_send_req[]."); /* flag no outstanding Isend */ U_diag_blk_send_req[myrow] = MPI_REQUEST_NULL; /* used 0 before */ /* allocating buffers for look-ahead */ i = Llu->bufmax[0]; if (i != 0) { if ( !(Llu->Lsub_buf_2[0] = intMalloc_dist ((num_look_aheads + 1) * ((size_t) i))) ) ABORT ("Malloc fails for Lsub_buf."); tempi = Llu->Lsub_buf_2[0]; for (jj = 0; jj < num_look_aheads; jj++) Llu->Lsub_buf_2[jj+1] = tempi + i*(jj+1); /* vectorize */ //Llu->Lsub_buf_2[jj + 1] = Llu->Lsub_buf_2[jj] + i; } i = Llu->bufmax[1]; if (i != 0) { if (!(Llu->Lval_buf_2[0] = doublecomplexMalloc_dist ((num_look_aheads + 1) * ((size_t) i)))) ABORT ("Malloc fails for Lval_buf[]."); tempr = Llu->Lval_buf_2[0]; for (jj = 0; jj < num_look_aheads; jj++) Llu->Lval_buf_2[jj+1] = tempr + i*(jj+1); /* vectorize */ //Llu->Lval_buf_2[jj + 1] = Llu->Lval_buf_2[jj] + i; } i = Llu->bufmax[2]; if (i != 0) { if (!(Llu->Usub_buf_2[0] = intMalloc_dist ((num_look_aheads + 1) * i))) ABORT ("Malloc fails for Usub_buf_2[]."); tempi = Llu->Usub_buf_2[0]; for (jj = 0; jj < num_look_aheads; jj++) Llu->Usub_buf_2[jj+1] = tempi + i*(jj+1); /* vectorize */ //Llu->Usub_buf_2[jj + 1] = Llu->Usub_buf_2[jj] + i; } i = Llu->bufmax[3]; if (i != 0) { if (!(Llu->Uval_buf_2[0] = doublecomplexMalloc_dist ((num_look_aheads + 1) * i))) ABORT ("Malloc fails for Uval_buf_2[]."); tempr = Llu->Uval_buf_2[0]; for (jj = 0; jj < num_look_aheads; jj++) Llu->Uval_buf_2[jj+1] = tempr + i*(jj+1); /* vectorize */ //Llu->Uval_buf_2[jj + 1] = Llu->Uval_buf_2[jj] + i; } } log_memory( (Llu->bufmax[0] + Llu->bufmax[2]) * (num_look_aheads + 1) * iword + (Llu->bufmax[1] + Llu->bufmax[3]) * (num_look_aheads + 1) * dword, stat ); /* creating pointers to the look-ahead buffers */ if (! (Lsub_buf_2 = SUPERLU_MALLOC ((1 + num_look_aheads) * sizeof (int_t *)))) ABORT ("Malloc fails for Lsub_buf_2[]."); if (! (Lval_buf_2 = SUPERLU_MALLOC ((1 + num_look_aheads) * sizeof (doublecomplex *)))) ABORT ("Malloc fails for Lval_buf_2[]."); if (! (Usub_buf_2 = SUPERLU_MALLOC ((1 + num_look_aheads) * sizeof (int_t *)))) ABORT ("Malloc fails for Uval_buf_2[]."); if (! (Uval_buf_2 = SUPERLU_MALLOC ((1 + num_look_aheads) * sizeof (doublecomplex *)))) ABORT ("Malloc fails for buf_2[]."); for (i = 0; i <= num_look_aheads; i++) { Lval_buf_2[i] = Llu->Lval_buf_2[i]; Lsub_buf_2[i] = Llu->Lsub_buf_2[i]; Uval_buf_2[i] = Llu->Uval_buf_2[i]; Usub_buf_2[i] = Llu->Usub_buf_2[i]; } if (!(msgcnts = SUPERLU_MALLOC ((1 + num_look_aheads) * sizeof (int *)))) ABORT ("Malloc fails for msgcnts[]."); if (!(msgcntsU = SUPERLU_MALLOC ((1 + num_look_aheads) * sizeof (int *)))) ABORT ("Malloc fails for msgcntsU[]."); for (i = 0; i <= num_look_aheads; i++) { if (!(msgcnts[i] = SUPERLU_MALLOC (4 * sizeof (int)))) ABORT ("Malloc fails for msgcnts[]."); if (!(msgcntsU[i] = SUPERLU_MALLOC (4 * sizeof (int)))) ABORT ("Malloc fails for msgcntsU[]."); } if (! (recv_reqs_u = SUPERLU_MALLOC ((1 + num_look_aheads) * sizeof (MPI_Request *)))) ABORT ("Malloc fails for recv_reqs_u[]."); if (! (send_reqs_u = SUPERLU_MALLOC ((1 + num_look_aheads) * sizeof (MPI_Request *)))) ABORT ("Malloc fails for send_reqs_u[]."); if (! (send_reqs = SUPERLU_MALLOC ((1 + num_look_aheads) * sizeof (MPI_Request *)))) ABORT ("Malloc fails for send_reqs_u[]."); if (! (recv_reqs = SUPERLU_MALLOC ((1 + num_look_aheads) * sizeof (MPI_Request *)))) ABORT ("Malloc fails for recv_reqs[]."); for (i = 0; i <= num_look_aheads; i++) { if (!(recv_reqs_u[i] = (MPI_Request *) SUPERLU_MALLOC (2 * sizeof (MPI_Request)))) ABORT ("Malloc fails for recv_req_u[i]."); if (!(send_reqs_u[i] = (MPI_Request *) SUPERLU_MALLOC (2 * Pr * sizeof (MPI_Request)))) ABORT ("Malloc fails for send_req_u[i]."); if (!(send_reqs[i] = (MPI_Request *) SUPERLU_MALLOC (2 * Pc * sizeof (MPI_Request)))) ABORT ("Malloc fails for send_reqs[i]."); if (!(recv_reqs[i] = (MPI_Request *) SUPERLU_MALLOC (4 * sizeof (MPI_Request)))) ABORT ("Malloc fails for recv_req[]."); send_reqs[i][0] = send_reqs[i][1] = MPI_REQUEST_NULL; recv_reqs[i][0] = recv_reqs[i][1] = MPI_REQUEST_NULL; } if (!(factored = SUPERLU_MALLOC (nsupers * sizeof (int_t)))) ABORT ("Malloc fails for factored[]."); if (!(factoredU = SUPERLU_MALLOC (nsupers * sizeof (int_t)))) ABORT ("Malloc fails for factoredU[]."); for (i = 0; i < nsupers; i++) factored[i] = factoredU[i] = -1; log_memory(2 * nsupers * iword, stat); int num_threads = 1; #ifdef _OPENMP #pragma omp parallel default(shared) #pragma omp master { //if (omp_get_thread_num () == 0) num_threads = omp_get_num_threads (); } #endif #if 0 omp_loop_time = (double *) _mm_malloc (sizeof (double) * num_threads,64); #else omp_loop_time = (double *) doubleMalloc_dist(num_threads); #endif #if ( PRNTlevel>=1 ) if(!iam) { printf(".. Starting with %d OpenMP threads \n", num_threads ); fflush(stdout); } #endif nblocks = 0; ncb = nsupers / Pc; /* number of column blocks, horizontal */ nrb = nsupers / Pr; /* number of row blocks, vertical */ /* in order to have dynamic scheduling */ int *full_u_cols; int *blk_ldu; #if 0 full_u_cols = (int_t *) _mm_malloc (sizeof (int_t) * ncb,64); blk_ldu = (int_t *) _mm_malloc (sizeof (int_t) * ncb,64); #else full_u_cols = SUPERLU_MALLOC(ncb * sizeof(int)); blk_ldu = SUPERLU_MALLOC(ncb * sizeof(int)); #endif log_memory(2 * ncb * iword, stat); #if 0 /* Sherry: not used? */ /* This bunch is used for static scheduling */ pair *full_col_count = (pair *) _mm_malloc (sizeof (pair) * ncb,64); int_t *count_cols, *sum_cols, *partition; count_cols = (int_t *) _mm_malloc (sizeof (int_t) * num_threads,64); sum_cols = (int_t *) _mm_malloc (sizeof (int_t) * num_threads,64); partition = (int_t *) _mm_malloc (sizeof (int_t) * num_threads * ncb,64); int_t ldp = ncb; #endif /* ################################################################## * Compute a good static schedule based on the factorization task graph. * ################################################################## */ perm_c_supno = SUPERLU_MALLOC (2 * nsupers * sizeof (int_t)); iperm_c_supno = perm_c_supno + nsupers; static_schedule(options, m, n, LUstruct, grid, stat, perm_c_supno, iperm_c_supno, info); #if ( DEBUGlevel >= 2 ) PrintInt10("schedule:perm_c_supno", nsupers, perm_c_supno); /* Turn off static schedule */ printf("[%d] .. Turn off static schedule for debugging ..\n", iam); for (i = 0; i < nsupers; ++i) perm_c_supno[i] = iperm_c_supno[i] = i; #endif /* ################################################################## */ /* constructing look-ahead table to indicate the last dependency */ int *look_ahead_l; /* Sherry: add comment on look_ahead_l[] */ stat->num_look_aheads = num_look_aheads; look_ahead_l = SUPERLU_MALLOC (nsupers * sizeof (int)); look_ahead = SUPERLU_MALLOC (nsupers * sizeof (int)); for (lb = 0; lb < nsupers; lb++) look_ahead_l[lb] = -1; /* vectorized */ log_memory(3 * nsupers * iword, stat); /* Sherry: omp parallel? not worth doing, due to concurrent write to look_ahead_l[jb] */ for (lb = 0; lb < nrb; ++lb) { /* go through U-factor */ ib = lb * Pr + myrow; index = Llu->Ufstnz_br_ptr[lb]; if (index) { /* Not an empty row */ k = BR_HEADER; for (j = 0; j < index[0]; ++j) { jb = index[k]; /* global block number */ if (jb != ib) look_ahead_l[jb] = SUPERLU_MAX (iperm_c_supno[ib], look_ahead_l[jb]); k += UB_DESCRIPTOR + SuperSize (index[k]); } } } if (myrow < nsupers % grid->nprow) { /* leftover block rows */ ib = nrb * Pr + myrow; index = Llu->Ufstnz_br_ptr[nrb]; if (index) { /* Not an empty row */ k = BR_HEADER; for (j = 0; j < index[0]; ++j) { jb = index[k]; if (jb != ib) look_ahead_l[jb] = SUPERLU_MAX (iperm_c_supno[ib], look_ahead_l[jb]); k += UB_DESCRIPTOR + SuperSize (index[k]); } } } if (options->SymPattern == NO) { /* Sherry: omp parallel? not worth doing, due to concurrent write to look_ahead_l[jb] */ for (lb = 0; lb < ncb; lb++) { /* go through L-factor */ ib = lb * Pc + mycol; index = Llu->Lrowind_bc_ptr[lb]; if (index) { k = BC_HEADER; for (j = 0; j < index[0]; j++) { jb = index[k]; if (jb != ib) look_ahead_l[jb] = SUPERLU_MAX (iperm_c_supno[ib], look_ahead_l[jb]); k += LB_DESCRIPTOR + index[k + 1]; } } } if (mycol < nsupers % grid->npcol) { /* leftover block columns */ ib = ncb * Pc + mycol; index = Llu->Lrowind_bc_ptr[ncb]; if (index) { k = BC_HEADER; for (j = 0; j < index[0]; j++) { jb = index[k]; if (jb != ib) look_ahead_l[jb] = SUPERLU_MAX (iperm_c_supno[ib], look_ahead_l[jb]); k += LB_DESCRIPTOR + index[k + 1]; } } } } MPI_Allreduce (look_ahead_l, look_ahead, nsupers, MPI_INT, MPI_MAX, grid->comm); SUPERLU_FREE (look_ahead_l); #ifdef ISORT iperm_u = SUPERLU_MALLOC (nsupers * sizeof (int_t)); perm_u = SUPERLU_MALLOC (nsupers * sizeof (int_t)); #else perm_u = SUPERLU_MALLOC (2 * nsupers * sizeof (int_t)); #endif log_memory(nsupers * iword, stat); k = sp_ienv_dist (3); /* max supernode size */ #if 0 if ( !(Llu->ujrow = doubleMalloc_dist(k*(k+1)/2)) ) ABORT("Malloc fails for ujrow[]."); #else /* Instead of half storage, we'll do full storage */ if (!(Llu->ujrow = doublecomplexCalloc_dist (k * k))) ABORT ("Malloc fails for ujrow[]."); #endif log_memory(k * k * iword, stat); #if ( PRNTlevel>=1 ) if (!iam) { printf (".. thresh = s_eps %e * anorm %e = %e\n", s_eps, anorm, thresh); printf (".. Buffer size: Lsub %ld\tLval %ld\tUsub %ld\tUval %ld\tLDA %ld\n", (long int) Llu->bufmax[0], (long int) Llu->bufmax[1], (long int) Llu->bufmax[2], (long int) Llu->bufmax[3], (long int) Llu->bufmax[4]); fflush(stdout); } #endif Lrowind_bc_ptr = Llu->Lrowind_bc_ptr; Lnzval_bc_ptr = Llu->Lnzval_bc_ptr; Ufstnz_br_ptr = Llu->Ufstnz_br_ptr; Unzval_br_ptr = Llu->Unzval_br_ptr; ToRecv = Llu->ToRecv; ToSendD = Llu->ToSendD; ToSendR = Llu->ToSendR; ldt = sp_ienv_dist (3); /* Size of maximum supernode */ k = CEILING (nsupers, Pr); /* Number of local block rows */ /* Following code is for finding maximum row dimension of all L panels */ int local_max_row_size = 0; int max_row_size; #if 0 #if defined _OPENMP // Sherry: parallel reduction -- seems slower? #pragma omp parallel for reduction(max :local_max_row_size) private(lk,lsub) #endif #endif for (int i = mycol; i < nsupers; i += Pc) { /* grab my local columns */ //int tpc = PCOL (i, grid); lk = LBj (i, grid); lsub = Lrowind_bc_ptr[lk]; if (lsub != NULL) { if (lsub[1] > local_max_row_size) local_max_row_size = lsub[1]; } } /* Max row size is global reduction within a row */ MPI_Allreduce (&local_max_row_size, &max_row_size, 1, MPI_INT, MPI_MAX, (grid->rscp.comm)); /* Buffer size is max of look-ahead window */ /* int_t buffer_size = SUPERLU_MAX (max_row_size * num_threads * ldt, get_max_buffer_size ()); */ #ifdef GPU_ACC int cublas_nb = get_cublas_nb(); int nstreams = get_num_cuda_streams (); int buffer_size = SUPERLU_MAX(max_row_size*nstreams*cublas_nb,get_max_buffer_size()); /* array holding last column blk for each partition, used in SchCompUdt--CUDA.c */ #if 0 int *stream_end_col = (int_t *) _mm_malloc (sizeof (int_t) * nstreams,64); #else int *stream_end_col = SUPERLU_MALLOC( nstreams * sizeof(int) ); #endif #else /* not to use GPU */ int Threads_per_process = get_thread_per_process(); int buffer_size = SUPERLU_MAX(max_row_size*Threads_per_process*ldt,get_max_buffer_size()); #endif /* end ifdef GPU_ACC */ #if 0 /* symmetric assumption -- using L's supernode to estimate. */ /* Note that in following expression 8 can be anything as long as its not too big */ int bigu_size = 8 * sp_ienv_dist (3) * (max_row_size); #else int_t bigu_size = estimate_bigu_size( nsupers, ldt, Ufstnz_br_ptr, Glu_persist, grid, perm_u ); #endif /* +16 to avoid cache line false sharing */ int_t bigv_size = SUPERLU_MAX(max_row_size * (bigu_size / ldt), (ldt*ldt + CACHELINE / dword) * num_threads); /* bigU and bigV are either on CPU or on GPU, not both. */ doublecomplex* bigU; /* for storing entire U(k,:) panel, prepare for GEMM. bigU has the same size either on CPU or on CPU. */ doublecomplex* bigV; /* for storing GEMM output matrix, i.e. update matrix. bigV is large to hold the aggregate GEMM output.*/ #if ( PRNTlevel>=1 ) if(!iam) { printf("max_nrows in L panel %d\n", max_row_size); printf("\t.. GEMM buffer size: max_nrows X max_ncols = %d x %d\n", max_row_size, (bigu_size / ldt)); printf(".. BIG U size %d\t BIG V size %d\n", bigu_size, bigv_size); fflush(stdout); } #endif #ifdef GPU_ACC if ( checkCuda(cudaHostAlloc((void**)&bigU, bigu_size * sizeof(doublecomplex), cudaHostAllocDefault)) ) ABORT("Malloc fails for zgemm buffer U "); bigv_size = buffer_size; #if ( PRNTlevel>=1 ) if (!iam) printf("[%d] .. BIG V bigv_size %d, using buffer_size %d (on GPU)\n", iam, bigv_size, buffer_size); #endif if ( checkCuda(cudaHostAlloc((void**)&bigV, bigv_size * sizeof(doublecomplex) ,cudaHostAllocDefault)) ) ABORT("Malloc fails for zgemm buffer V"); DisplayHeader(); #if ( PRNTlevel>=1 ) printf(" Starting with %d Cuda Streams \n",nstreams ); #endif cublasHandle_t *handle; handle = (cublasHandle_t *) SUPERLU_MALLOC(sizeof(cublasHandle_t)*nstreams); for(int i = 0; i < nstreams; i++) handle[i] = create_handle(); // creating streams cudaStream_t *streams; streams = (cudaStream_t *) SUPERLU_MALLOC(sizeof(cudaStream_t)*nstreams); for (int i = 0; i < nstreams; ++i) checkCuda( cudaStreamCreate(&streams[i]) ); // allocating data in device doublecomplex *dA, *dB, *dC; cudaError_t cudaStat; #if 0 // cudaStat = cudaMalloc( (void**)&dA, m*k*sizeof(double)); // HOw much should be the size of dA? // for time being just making it // cudaStat = cudaMalloc( (void**)&dA, ((max_row_size*sp_ienv_dist(3)))* sizeof(double)); #endif cudaStat = cudaMalloc( (void**)&dA, max_row_size*sp_ienv_dist(3)* sizeof(doublecomplex)); if (cudaStat!= cudaSuccess) { fprintf(stderr, "!!!! Error in allocating A in the device %ld \n",m*k*sizeof(doublecomplex) ); return 1; } // size of B should be max_supernode_size*buffer cudaStat = cudaMalloc((void**)&dB, bigu_size * sizeof(doublecomplex)); if (cudaStat!= cudaSuccess) { fprintf(stderr, "!!!! Error in allocating B in the device %ld \n",n*k*sizeof(doublecomplex)); return 1; } cudaStat = cudaMalloc((void**)&dC, buffer_size* sizeof(doublecomplex) ); if (cudaStat!= cudaSuccess) { fprintf(stderr, "!!!! Error in allocating C in the device \n" ); return 1; } stat->gpu_buffer += ( max_row_size * sp_ienv_dist(3) + bigu_size + buffer_size ) * dword; #else /* not CUDA */ // for GEMM padding 0 j = bigu_size / ldt; bigu_size += (gemm_k_pad * (j + ldt + gemm_n_pad)); bigv_size += (gemm_m_pad * (j + max_row_size + gemm_n_pad)); #ifdef __INTEL_COMPILER bigU = _mm_malloc(bigu_size * sizeof(doublecomplex), 1<<12); // align at 4K page bigV = _mm_malloc(bigv_size * sizeof(doublecomplex), 1<<12); #else if ( !(bigU = doublecomplexMalloc_dist(bigu_size)) ) ABORT ("Malloc fails for zgemm U buffer"); //Maximum size of bigU= sqrt(buffsize) ? // int bigv_size = 8 * ldt * ldt * num_threads; if ( !(bigV = doublecomplexMalloc_dist(bigv_size)) ) ABORT ("Malloc failed for zgemm V buffer"); #endif #endif /* end ifdef GPU_ACC */ log_memory((bigv_size + bigu_size) * dword, stat); // mlock(bigU,(bigu_size) * sizeof (double)); #if ( PRNTlevel>=1 ) if(!iam) { printf (" Max row size is %d \n", max_row_size); printf (" Threads per process %d \n", num_threads); fflush(stdout); } #endif #if 0 /* Sherry */ if (!(tempv2d = doublecomplexCalloc_dist (2 * ((size_t) ldt) * ldt))) ABORT ("Calloc fails for tempv2d[]."); tempU2d = tempv2d + ldt * ldt; #endif /* Sherry: (ldt + 16), avoid cache line false sharing. KNL cacheline size = 64 bytes = 16 int */ iinfo = ldt + CACHELINE / sizeof(int); if (!(indirect = SUPERLU_MALLOC (iinfo * num_threads * sizeof(int)))) ABORT ("Malloc fails for indirect[]."); if (!(indirect2 = SUPERLU_MALLOC (iinfo * num_threads * sizeof(int)))) ABORT ("Malloc fails for indirect[]."); if (!(iuip = intMalloc_dist (k))) ABORT ("Malloc fails for iuip[]."); if (!(ruip = intMalloc_dist (k))) ABORT ("Malloc fails for ruip[]."); log_memory(2 * ldt*ldt * dword + 2 * iinfo * num_threads * iword + 2 * k * iword, stat); int_t *lookAheadFullRow,*lookAheadStRow,*lookAhead_lptr,*lookAhead_ib, *RemainFullRow,*RemainStRow,*Remain_lptr,*Remain_ib; lookAheadFullRow = intMalloc_dist( (num_look_aheads+1) ); lookAheadStRow = intMalloc_dist( (num_look_aheads+1) ); lookAhead_lptr = intMalloc_dist( (num_look_aheads+1) ); lookAhead_ib = intMalloc_dist( (num_look_aheads+1) ); int_t mrb= (nsupers+Pr-1) / Pr; int_t mcb= (nsupers+Pc-1) / Pc; RemainFullRow = intMalloc_dist(mrb); RemainStRow = intMalloc_dist(mrb); #if 0 Remain_lptr = (int *) _mm_malloc(sizeof(int)*mrb,1); #else Remain_lptr = intMalloc_dist(mrb); #endif // mlock(Remain_lptr, sizeof(int)*mrb ); Remain_ib = intMalloc_dist(mrb); Remain_info_t *Remain_info; #if 0 Remain_info = (Remain_info_t *) _mm_malloc(mrb*sizeof(Remain_info_t),64); #else Remain_info = (Remain_info_t *) SUPERLU_MALLOC(mrb*sizeof(Remain_info_t)); #endif doublecomplex *lookAhead_L_buff, *Remain_L_buff; /* Stores entire L-panel */ Ublock_info_t *Ublock_info; ldt = sp_ienv_dist (3); /* max supernode size */ /* The following is quite loose */ lookAhead_L_buff = doublecomplexMalloc_dist(ldt*ldt* (num_look_aheads+1) ); #if 0 Remain_L_buff = (doublecomplex *) _mm_malloc( sizeof(doublecomplex)*(Llu->bufmax[1]),64); Ublock_info = (Ublock_info_t *) _mm_malloc(mcb*sizeof(Ublock_info_t),64); int * Ublock_info_iukp = (int *) _mm_malloc(mcb*sizeof(int),64); int * Ublock_info_rukp = (int *) _mm_malloc(mcb*sizeof(int),64); int * Ublock_info_jb = (int *) _mm_malloc(mcb*sizeof(int),64); #else j = gemm_m_pad * (ldt + max_row_size + gemm_k_pad); Remain_L_buff = doublecomplexMalloc_dist(Llu->bufmax[1] + j); /* This is loose */ Ublock_info = (Ublock_info_t *) SUPERLU_MALLOC(mcb*sizeof(Ublock_info_t)); int *Ublock_info_iukp = (int *) SUPERLU_MALLOC(mcb*sizeof(int)); int *Ublock_info_rukp = (int *) SUPERLU_MALLOC(mcb*sizeof(int)); int *Ublock_info_jb = (int *) SUPERLU_MALLOC(mcb*sizeof(int)); #endif long long alloc_mem = 4 * mrb * iword + mrb * sizeof(Remain_info_t) + ldt * ldt * (num_look_aheads+1) * dword + Llu->bufmax[1] * dword ; log_memory(alloc_mem, stat); InitTimer = SuperLU_timer_() - tt1; double pxgstrfTimer = SuperLU_timer_(); /* ################################################################## ** Handle first block column separately to start the pipeline. ** ################################################################## */ look_id = 0; msgcnt = msgcnts[0]; /* Lsub[0] to be transferred */ send_req = send_reqs[0]; recv_req = recv_reqs[0]; k0 = 0; k = perm_c_supno[0]; kcol = PCOL (k, grid); krow = PROW (k, grid); if (mycol == kcol) { double ttt1 = SuperLU_timer_(); /* panel factorization */ PZGSTRF2 (options, k0, k, thresh, Glu_persist, grid, Llu, U_diag_blk_send_req, tag_ub, stat, info); pdgstrf2_timer += SuperLU_timer_()-ttt1; scp = &grid->rscp; /* The scope of process row. */ /* Multicasts numeric values of L(:,0) to process rows. */ lk = LBj (k, grid); /* Local block number. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; if (lsub) { /* number of entries in Lsub_buf[] to be transferred */ msgcnt[0] = lsub[1] + BC_HEADER + lsub[0] * LB_DESCRIPTOR; /* number of entries in Lval_buf[] to be transferred */ msgcnt[1] = lsub[1] * SuperSize (k); } else { msgcnt[0] = msgcnt[1] = 0; } for (pj = 0; pj < Pc; ++pj) { if (ToSendR[lk][pj] != EMPTY) { #if ( PROFlevel>=1 ) TIC (t1); #endif MPI_Isend (lsub, msgcnt[0], mpi_int_t, pj, SLU_MPI_TAG (0, 0) /* 0 */, scp->comm, &send_req[pj]); MPI_Isend (lusup, msgcnt[1], SuperLU_MPI_DOUBLE_COMPLEX, pj, SLU_MPI_TAG (1, 0) /* 1 */, scp->comm, &send_req[pj + Pc]); #if ( DEBUGlevel>=2 ) printf ("[%d] first block cloumn Send L(:,%4d): lsub %4d, lusup %4d to Pc %2d\n", iam, 0, msgcnt[0], msgcnt[1], pj); #endif #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_RIGHT] += t2; ++prof_sendR[lk]; msg_cnt += 2; msg_vol += msgcnt[0] * iword + msgcnt[1] * dword; #endif } /* end if */ } /* end for pj ... */ } else { /* Post immediate receives. */ if (ToRecv[k] >= 1) { /* Recv block column L(:,0). */ scp = &grid->rscp; /* The scope of process row. */ #if ( PROFlevel>=1 ) TIC (t1); #endif MPI_Irecv (Lsub_buf_2[0], Llu->bufmax[0], mpi_int_t, kcol, SLU_MPI_TAG (0, 0) /* 0 */ , scp->comm, &recv_req[0]); MPI_Irecv (Lval_buf_2[0], Llu->bufmax[1], SuperLU_MPI_DOUBLE_COMPLEX, kcol, SLU_MPI_TAG (1, 0) /* 1 */ , scp->comm, &recv_req[1]); #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_RIGHT] += t2; #endif } } /* end if mycol == 0 */ factored[k] = 0; /* flag column k as factored. */ /* post receive of first U-row */ if (myrow != krow) { if (ToRecv[k] == 2) { /* Recv block row U(k,:). */ scp = &grid->cscp; /* The scope of process column. */ Usub_buf = Llu->Usub_buf_2[0]; Uval_buf = Llu->Uval_buf_2[0]; #if ( PROFlevel>=1 ) TIC (t1); #endif MPI_Irecv (Usub_buf, Llu->bufmax[2], mpi_int_t, krow, SLU_MPI_TAG (2, 0) /* 2%tag_ub */ , scp->comm, &recv_reqs_u[0][0]); MPI_Irecv (Uval_buf, Llu->bufmax[3], SuperLU_MPI_DOUBLE_COMPLEX, krow, SLU_MPI_TAG (3, 0) /* 3%tag_ub */ , scp->comm, &recv_reqs_u[0][1]); #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_DOWN] += t2; #endif } } /* ################################################################## **** MAIN LOOP **** ################################################################## */ for (k0 = 0; k0 < nsupers; ++k0) { k = perm_c_supno[k0]; /* ============================================ * * ======= look-ahead the new L columns ======= * * ============================================ */ /* tt1 = SuperLU_timer_(); */ if (k0 == 0) { /* look-ahead all the columns in the window */ kk1 = k0 + 1; kk2 = SUPERLU_MIN (k0 + num_look_aheads, nsupers - 1); } else { /* look-ahead one new column after the current window */ kk1 = k0 + num_look_aheads; kk2 = SUPERLU_MIN (kk1, nsupers - 1); } for (kk0 = kk1; kk0 <= kk2; kk0++) { /* loop through look-ahead window in L */ kk = perm_c_supno[kk0]; /* use the ordering from static schedule */ look_id = kk0 % (1 + num_look_aheads); /* which column in window */ if (look_ahead[kk] < k0) { /* does not depend on current column k */ kcol = PCOL (kk, grid); if (mycol == kcol) { /* I own this panel */ /* Panel factorization -- Factor diagonal and subdiagonal L blocks and test for exact singularity. */ factored[kk] = 0; /* flag column kk as factored */ double ttt1 = SuperLU_timer_(); PZGSTRF2 (options, kk0, kk, thresh, Glu_persist, grid, Llu, U_diag_blk_send_req, tag_ub, stat, info); pdgstrf2_timer += SuperLU_timer_() - ttt1; /* Multicasts numeric values of L(:,kk) to process rows. */ /* ttt1 = SuperLU_timer_(); */ msgcnt = msgcnts[look_id]; /* point to the proper count array */ send_req = send_reqs[look_id]; lk = LBj (kk, grid); /* Local block number in L. */ lsub1 = Lrowind_bc_ptr[lk]; if (lsub1) { msgcnt[0] = lsub1[1] + BC_HEADER + lsub1[0] * LB_DESCRIPTOR; /* size of metadata */ msgcnt[1] = lsub1[1] * SuperSize (kk); /* Lval_buf[] size */ } else { msgcnt[0] = 0; msgcnt[1] = 0; } scp = &grid->rscp; /* The scope of process row. */ for (pj = 0; pj < Pc; ++pj) { if (ToSendR[lk][pj] != EMPTY) { lusup1 = Lnzval_bc_ptr[lk]; #if ( PROFlevel>=1 ) TIC (t1); #endif MPI_Isend (lsub1, msgcnt[0], mpi_int_t, pj, SLU_MPI_TAG (0, kk0), /* (4*kk0)%tag_ub */ scp->comm, &send_req[pj]); MPI_Isend (lusup1, msgcnt[1], SuperLU_MPI_DOUBLE_COMPLEX, pj, SLU_MPI_TAG (1, kk0), /* (4*kk0+1)%tag_ub */ scp->comm, &send_req[pj + Pc]); #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_RIGHT] += t2; ++prof_sendR[lk]; #endif #if ( DEBUGlevel>=2 ) printf ("[%d] -1- Send L(:,%4d): #lsub1 %4d, #lusup1 %4d right to Pj %2d\n", iam, kk, msgcnt[0], msgcnt[1], pj); #endif } } /* stat->time9 += SuperLU_timer_() - ttt1; */ } else { /* Post Recv of block column L(:,kk). */ /* double ttt1 = SuperLU_timer_(); */ if (ToRecv[kk] >= 1) { scp = &grid->rscp; /* The scope of process row. */ recv_req = recv_reqs[look_id]; #if ( PROFlevel>=1 ) TIC (t1); #endif MPI_Irecv (Lsub_buf_2[look_id], Llu->bufmax[0], mpi_int_t, kcol, SLU_MPI_TAG (0, kk0), /* (4*kk0)%tag_ub */ scp->comm, &recv_req[0]); MPI_Irecv (Lval_buf_2[look_id], Llu->bufmax[1], SuperLU_MPI_DOUBLE_COMPLEX, kcol, SLU_MPI_TAG (1, kk0), /* (4*kk0+1)%tag_ub */ scp->comm, &recv_req[1]); #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_RIGHT] += t2; #endif } /* stat->time10 += SuperLU_timer_() - ttt1; */ } /* end if mycol == Pc(kk) */ } /* end if look-ahead in L panels */ /* Pre-post irecv for U-row look-ahead */ krow = PROW (kk, grid); if (myrow != krow) { if (ToRecv[kk] == 2) { /* post iRecv block row U(kk,:). */ scp = &grid->cscp; /* The scope of process column. */ Usub_buf = Llu->Usub_buf_2[look_id]; Uval_buf = Llu->Uval_buf_2[look_id]; #if ( PROFlevel>=1 ) TIC (t1); #endif MPI_Irecv (Usub_buf, Llu->bufmax[2], mpi_int_t, krow, SLU_MPI_TAG (2, kk0) /* (4*kk0+2)%tag_ub */ , scp->comm, &recv_reqs_u[look_id][0]); MPI_Irecv (Uval_buf, Llu->bufmax[3], SuperLU_MPI_DOUBLE_COMPLEX, krow, SLU_MPI_TAG (3, kk0) /* (4*kk0+3)%tag_ub */ , scp->comm, &recv_reqs_u[look_id][1]); #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_DOWN] += t2; #endif } } } /* end for each column in look-ahead window for L panels */ /* stat->time4 += SuperLU_timer_()-tt1; */ /* ================================= * * ==== look-ahead the U rows === * * ================================= */ kk1 = k0; kk2 = SUPERLU_MIN (k0 + num_look_aheads, nsupers - 1); for (kk0 = kk1; kk0 < kk2; kk0++) { kk = perm_c_supno[kk0]; /* order determined from static schedule */ if (factoredU[kk0] != 1 && look_ahead[kk] < k0) { /* does not depend on current column k */ kcol = PCOL (kk, grid); krow = PROW (kk, grid); lk = LBj (kk, grid); /* Local block number across row. NOT USED?? -- Sherry */ look_id = kk0 % (1 + num_look_aheads); msgcnt = msgcntsU[look_id]; recv_req = recv_reqs[look_id]; /* ================================================= * * Check if diagonal block has been received * * for panel factorization of U in look-ahead window * * ================================================= */ if (mycol == kcol) { /* I own this column panel, no need to receive L */ flag0 = flag1 = 1; msgcnt[0] = msgcnt[1] = -1; /* No need to transfer Lsub, nor Lval */ } else { /* Check to receive L(:,kk) from the left */ flag0 = flag1 = 0; if ( ToRecv[kk] >= 1 ) { #if ( PROFlevel>=1 ) TIC (t1); #endif if ( recv_req[0] != MPI_REQUEST_NULL ) { MPI_Test (&recv_req[0], &flag0, &status); if ( flag0 ) { MPI_Get_count (&status, mpi_int_t, &msgcnt[0]); recv_req[0] = MPI_REQUEST_NULL; } } else flag0 = 1; if ( recv_req[1] != MPI_REQUEST_NULL ) { MPI_Test (&recv_req[1], &flag1, &status); if ( flag1 ) { MPI_Get_count (&status, mpi_int_t, &msgcnt[1]); recv_req[1] = MPI_REQUEST_NULL; } } else flag1 = 1; #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_RIGHT] += t2; #endif } else { msgcnt[0] = 0; } } if (flag0 && flag1) { /* L(:,kk) is ready */ /* tt1 = SuperLU_timer_(); */ scp = &grid->cscp; /* The scope of process column. */ if (myrow == krow) { factoredU[kk0] = 1; /* Parallel triangular solve across process row *krow* -- U(k,j) = L(k,k) \ A(k,j). */ double ttt2 = SuperLU_timer_(); #ifdef _OPENMP /* #pragma omp parallel */ /* Sherry -- parallel done inside pzgstrs2 */ #endif { PZGSTRS2 (kk0, kk, Glu_persist, grid, Llu, stat); } pdgstrs2_timer += SuperLU_timer_()-ttt2; /* stat->time8 += SuperLU_timer_()-ttt2; */ /* Multicasts U(kk,:) to process columns. */ lk = LBi (kk, grid); usub = Ufstnz_br_ptr[lk]; uval = Unzval_br_ptr[lk]; if (usub) { msgcnt[2] = usub[2]; /* metadata size */ msgcnt[3] = usub[1]; /* Uval[] size */ } else { msgcnt[2] = msgcnt[3] = 0; } if (ToSendD[lk] == YES) { for (pi = 0; pi < Pr; ++pi) { if (pi != myrow) { #if ( PROFlevel>=1 ) TIC (t1); #endif MPI_Isend (usub, msgcnt[2], mpi_int_t, pi, SLU_MPI_TAG (2, kk0), /* (4*kk0+2)%tag_ub */ scp->comm, &send_reqs_u[look_id][pi]); MPI_Isend (uval, msgcnt[3], SuperLU_MPI_DOUBLE_COMPLEX, pi, SLU_MPI_TAG (3, kk0), /* (4*kk0+3)%tag_ub */ scp->comm, &send_reqs_u[look_id][pi + Pr]); #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; msg_cnt += 2; msg_vol += msgcnt[2] * iword + msgcnt[3] * dword; #endif #if ( DEBUGlevel>=2 ) printf ("[%d] Send U(%4d,:) to Pr %2d\n", iam, k, pi); #endif } /* if pi ... */ } /* for pi ... */ } /* if ToSendD ... */ /* stat->time2 += SuperLU_timer_()-tt1; */ } /* end if myrow == krow */ } /* end if flag0 & flag1 ... */ } /* end if factoredU[] ... */ } /* end for kk0 ... */ /* ============================================== * * == start processing the current row of U(k,:) * * ============================================== */ knsupc = SuperSize (k); krow = PROW (k, grid); kcol = PCOL (k, grid); /* tt1 = SuperLU_timer_(); */ look_id = k0 % (1 + num_look_aheads); recv_req = recv_reqs[look_id]; send_req = send_reqs[look_id]; msgcnt = msgcnts[look_id]; Usub_buf = Llu->Usub_buf_2[look_id]; Uval_buf = Llu->Uval_buf_2[look_id]; if (mycol == kcol) { lk = LBj (k, grid); /* Local block number in L */ #if ( PROFlevel>=1 ) TIC(t1); #endif for (pj = 0; pj < Pc; ++pj) { /* Wait for Isend to complete before using lsub/lusup buffer. */ if (ToSendR[lk][pj] != EMPTY) { MPI_Wait (&send_req[pj], &status); MPI_Wait (&send_req[pj + Pc], &status); } } #if ( PROFlevel>=1 ) TOC(t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_RIGHT] += t2; #endif lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; } else { if (ToRecv[k] >= 1) { /* Recv block column L(:,k). */ scp = &grid->rscp; /* The scope of process row. */ /* ============================================= * * Waiting for L(:,kk) for outer-product uptate * * if iam in U(kk,:), then the diagonal block * * did not reach in time for panel factorization * * of U(k,:). * * ============================================= */ #if ( PROFlevel>=1 ) TIC (t1); #endif if (recv_req[0] != MPI_REQUEST_NULL) { MPI_Wait (&recv_req[0], &status); MPI_Get_count (&status, mpi_int_t, &msgcnt[0]); recv_req[0] = MPI_REQUEST_NULL; } else { msgcnt[0] = msgcntsU[look_id][0]; #if (DEBUGlevel>=2) printf("\t[%d] k=%d, look_id=%d, recv_req[0] == MPI_REQUEST_NULL, msgcnt[0] = %d\n", iam, k, look_id, msgcnt[0]); #endif } if (recv_req[1] != MPI_REQUEST_NULL) { MPI_Wait (&recv_req[1], &status); MPI_Get_count (&status, SuperLU_MPI_DOUBLE_COMPLEX, &msgcnt[1]); recv_req[1] = MPI_REQUEST_NULL; } else { msgcnt[1] = msgcntsU[look_id][1]; #if (DEBUGlevel>=2) printf("\t[%d] k=%d, look_id=%d, recv_req[1] == MPI_REQUEST_NULL, msgcnt[1] = %d\n", iam, k, look_id, msgcnt[1]); #endif } #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_RIGHT] += t2; #endif #if ( DEBUGlevel>=2 ) printf("[%d] Recv L(:,%4d): #lsub %4d, #lusup %4d from Pc %2d\n", iam, k, msgcnt[0], msgcnt[1], kcol); fflush (stdout); #endif #if ( PRNTlevel==3 ) ++total_msg; if (!msgcnt[0]) ++zero_msg; #endif } else { msgcnt[0] = 0; } lsub = Lsub_buf_2[look_id]; lusup = Lval_buf_2[look_id]; } /* else if mycol = Pc(k) */ /* stat->time1 += SuperLU_timer_()-tt1; */ scp = &grid->cscp; /* The scope of process column. */ /* tt1 = SuperLU_timer_(); */ if (myrow == krow) { /* I own U(k,:) */ lk = LBi (k, grid); usub = Ufstnz_br_ptr[lk]; uval = Unzval_br_ptr[lk]; if (factoredU[k0] == -1) { /* Parallel triangular solve across process row *krow* -- U(k,j) = L(k,k) \ A(k,j). */ double ttt2 = SuperLU_timer_(); #ifdef _OPENMP /* #pragma omp parallel */ /* Sherry -- parallel done inside pzgstrs2 */ #endif { PZGSTRS2 (k0, k, Glu_persist, grid, Llu, stat); } pdgstrs2_timer += SuperLU_timer_() - ttt2; /* Sherry -- need to set factoredU[k0] = 1; ?? */ /* Multicasts U(k,:) along process columns. */ if ( usub ) { msgcnt[2] = usub[2]; /* metadata size */ msgcnt[3] = usub[1]; /* Uval[] size */ } else { msgcnt[2] = msgcnt[3] = 0; } if (ToSendD[lk] == YES) { for (pi = 0; pi < Pr; ++pi) { if (pi != myrow) { /* Matching recv was pre-posted before */ #if ( PROFlevel>=1 ) TIC (t1); #endif MPI_Send (usub, msgcnt[2], mpi_int_t, pi, SLU_MPI_TAG (2, k0), /* (4*k0+2)%tag_ub */ scp->comm); MPI_Send (uval, msgcnt[3], SuperLU_MPI_DOUBLE_COMPLEX, pi, SLU_MPI_TAG (3, k0), /* (4*k0+3)%tag_ub */ scp->comm); #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_DOWN] += t2; msg_cnt += 2; msg_vol += msgcnt[2] * iword + msgcnt[3] * dword; #endif #if ( DEBUGlevel>=2 ) printf ("[%d] Send U(%4d,:) down to Pr %2d\n", iam, k, pi); #endif } /* if pi ... */ } /* for pi ... */ } /* if ToSendD ... */ } else { /* Panel U(k,:) already factorized from previous look-ahead */ /* ================================================ * * Wait for downward sending of U(k,:) to complete * * for outer-product update. * * ================================================ */ if (ToSendD[lk] == YES) { #if ( PROFlevel>=1 ) TIC (t1); #endif for (pi = 0; pi < Pr; ++pi) { if (pi != myrow) { MPI_Wait (&send_reqs_u[look_id][pi], &status); MPI_Wait (&send_reqs_u[look_id][pi + Pr], &status); } } #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_DOWN] += t2; #endif } msgcnt[2] = msgcntsU[look_id][2]; msgcnt[3] = msgcntsU[look_id][3]; } /* stat->time2 += SuperLU_timer_()-tt1; */ } else { /* myrow != krow */ /* ========================================== * * Wait for U(k,:) for outer-product updates. * * ========================================== */ if (ToRecv[k] == 2) { /* Recv block row U(k,:). */ #if ( PROFlevel>=1 ) TIC (t1); #endif MPI_Wait (&recv_reqs_u[look_id][0], &status); MPI_Get_count (&status, mpi_int_t, &msgcnt[2]); MPI_Wait (&recv_reqs_u[look_id][1], &status); MPI_Get_count (&status, SuperLU_MPI_DOUBLE_COMPLEX, &msgcnt[3]); #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_DOWN] += t2; #endif usub = Usub_buf; uval = Uval_buf; #if ( DEBUGlevel>=2 ) printf ("[%d] Recv U(%4d,:) from Pr %2d\n", iam, k, krow); #endif #if ( PRNTlevel==3 ) ++total_msg; if (!msgcnt[2]) ++zero_msg; #endif } else { msgcnt[2] = 0; } /* stat->time6 += SuperLU_timer_()-tt1; */ } /* end if myrow == Pr(k) */ /* * Parallel rank-k update; pair up blocks L(i,k) and U(k,j). * for (j = k+1; k < N; ++k) { * for (i = k+1; i < N; ++i) * if ( myrow == PROW( i, grid ) && mycol == PCOL( j, grid ) * && L(i,k) != 0 && U(k,j) != 0 ) * A(i,j) = A(i,j) - L(i,k) * U(k,j); */ msg0 = msgcnt[0]; msg2 = msgcnt[2]; /* tt1 = SuperLU_timer_(); */ if (msg0 && msg2) { /* L(:,k) and U(k,:) are not empty. */ nsupr = lsub[1]; /* LDA of lusup. */ if (myrow == krow) { /* Skip diagonal block L(k,k). */ lptr0 = BC_HEADER + LB_DESCRIPTOR + lsub[BC_HEADER + 1]; luptr0 = knsupc; nlb = lsub[0] - 1; } else { lptr0 = BC_HEADER; luptr0 = 0; nlb = lsub[0]; } iukp = BR_HEADER; /* Skip header; Pointer to index[] of U(k,:) */ rukp = 0; /* Pointer to nzval[] of U(k,:) */ nub = usub[0]; /* Number of blocks in the block row U(k,:) */ klst = FstBlockC (k + 1); /* ------------------------------------------------------------- Update the look-ahead block columns A(:,k+1:k+num_look_ahead) ------------------------------------------------------------- */ iukp0 = iukp; rukp0 = rukp; /* reorder the remaining columns in bottome-up */ /* TAU_STATIC_TIMER_START("LOOK_AHEAD_UPDATE"); */ for (jj = 0; jj < nub; jj++) { #ifdef ISORT iperm_u[jj] = iperm_c_supno[usub[iukp]]; /* Global block number of block U(k,j). */ perm_u[jj] = jj; #else perm_u[2 * jj] = iperm_c_supno[usub[iukp]]; /* Global block number of block U(k,j). */ perm_u[2 * jj + 1] = jj; #endif jb = usub[iukp]; /* Global block number of block U(k,j). */ nsupc = SuperSize (jb); iukp += UB_DESCRIPTOR; /* Start fstnz of block U(k,j). */ iukp += nsupc; } iukp = iukp0; #ifdef ISORT isort (nub, iperm_u, perm_u); #else qsort (perm_u, (size_t) nub, 2 * sizeof (int_t), &superlu_sort_perm); #endif j = jj0 = 0; /************************************************************************/ #if 0 for (jj = 0; jj < nub; ++jj) assert(perm_u[jj] == jj); /* Sherry */ #endif double ttx =SuperLU_timer_(); //#include "zlook_ahead_update_v4.c" #include "zlook_ahead_update.c" lookaheadupdatetimer += SuperLU_timer_() - ttx; /************************************************************************/ /*ifdef OMP_LOOK_AHEAD */ /* TAU_STATIC_TIMER_STOP("LOOK_AHEAD_UPDATE"); */ } /* if L(:,k) and U(k,:) not empty */ /* stat->time3 += SuperLU_timer_()-tt1; */ /* ================== */ /* == post receive == */ /* ================== */ kk1 = SUPERLU_MIN (k0 + num_look_aheads, nsupers - 1); for (kk0 = k0 + 1; kk0 <= kk1; kk0++) { kk = perm_c_supno[kk0]; kcol = PCOL (kk, grid); if (look_ahead[kk] == k0) { if (mycol != kcol) { if (ToRecv[kk] >= 1) { scp = &grid->rscp; /* The scope of process row. */ look_id = kk0 % (1 + num_look_aheads); recv_req = recv_reqs[look_id]; #if ( PROFlevel>=1 ) TIC (t1); #endif MPI_Irecv (Lsub_buf_2[look_id], Llu->bufmax[0], mpi_int_t, kcol, SLU_MPI_TAG (0, kk0), /* (4*kk0)%tag_ub */ scp->comm, &recv_req[0]); MPI_Irecv (Lval_buf_2[look_id], Llu->bufmax[1], SuperLU_MPI_DOUBLE_COMPLEX, kcol, SLU_MPI_TAG (1, kk0), /* (4*kk0+1)%tag_ub */ scp->comm, &recv_req[1]); #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_RIGHT] += t2; #endif } } else { lk = LBj (kk, grid); /* Local block number. */ lsub1 = Lrowind_bc_ptr[lk]; lusup1 = Lnzval_bc_ptr[lk]; if (factored[kk] == -1) { /* Factor diagonal and subdiagonal blocks and test for exact singularity. */ factored[kk] = 0; /* flag column kk as factored */ double ttt1 = SuperLU_timer_(); PZGSTRF2 (options, kk0, kk, thresh, Glu_persist, grid, Llu, U_diag_blk_send_req, tag_ub, stat, info); pdgstrf2_timer += SuperLU_timer_() - ttt1; /* Process column *kcol+1* multicasts numeric values of L(:,k+1) to process rows. */ look_id = kk0 % (1 + num_look_aheads); send_req = send_reqs[look_id]; msgcnt = msgcnts[look_id]; if (lsub1) { msgcnt[0] = lsub1[1] + BC_HEADER + lsub1[0] * LB_DESCRIPTOR; msgcnt[1] = lsub1[1] * SuperSize (kk); } else { msgcnt[0] = 0; msgcnt[1] = 0; } scp = &grid->rscp; /* The scope of process row. */ for (pj = 0; pj < Pc; ++pj) { if (ToSendR[lk][pj] != EMPTY) { #if ( PROFlevel>=1 ) TIC (t1); #endif MPI_Isend (lsub1, msgcnt[0], mpi_int_t, pj, SLU_MPI_TAG (0, kk0), /* (4*kk0)%tag_ub */ scp->comm, &send_req[pj]); MPI_Isend (lusup1, msgcnt[1], SuperLU_MPI_DOUBLE_COMPLEX, pj, SLU_MPI_TAG (1, kk0), /* (4*kk0+1)%tag_ub */ scp->comm, &send_req[pj + Pc]); #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_RIGHT] += t2; ++prof_sendR[lk]; #endif } } /* end for pj ... */ } /* if factored[kk] ... */ } } } double tsch = SuperLU_timer_(); /*******************************************************************/ #ifdef GPU_ACC #include "zSchCompUdt-cuda.c" #else /*#include "SchCompUdt--Phi-2Ddynamic-alt.c"*/ //#include "zSchCompUdt-2Ddynamic_v6.c" #include "zSchCompUdt-2Ddynamic.c" #endif /*uncomment following to compare against SuperLU 3.3 baseline*/ /* #include "SchCompUdt--baseline.c" */ /************************************************************************/ NetSchurUpTimer += SuperLU_timer_() - tsch; } /* MAIN LOOP for k0 = 0, ... */ /* ################################################################## ** END MAIN LOOP: for k0 = ... ################################################################## */ pxgstrfTimer = SuperLU_timer_() - pxgstrfTimer; #if ( PRNTlevel>=1 ) /* Print detailed statistics */ /* Updating total flops */ double allflops; MPI_Reduce(&RemainGEMM_flops, &allflops, 1, MPI_DOUBLE, MPI_SUM, 0, grid->comm); if ( iam==0 ) { printf("\nInitialization time\t%8.2lf seconds\n" "\t Serial: compute static schedule, allocate storage\n", InitTimer); printf("\n==== Time breakdown in factorization (rank 0) ====\n"); printf("Panel factorization \t %8.2lf seconds\n", pdgstrf2_timer + pdgstrs2_timer); printf(".. L-panel pxgstrf2 \t %8.2lf seconds\n", pdgstrf2_timer); printf(".. U-panel pxgstrs2 \t %8.2lf seconds\n", pdgstrs2_timer); printf("Time in Look-ahead update \t %8.2lf seconds\n", lookaheadupdatetimer); printf("Time in Schur update \t\t %8.2lf seconds\n", NetSchurUpTimer); printf(".. Time to Gather L buffer\t %8.2lf (Separate L panel by Lookahead/Remain)\n", GatherLTimer); printf(".. Time to Gather U buffer\t %8.2lf \n", GatherUTimer); printf(".. Time in GEMM %8.2lf \n", LookAheadGEMMTimer + RemainGEMMTimer); printf("\t* Look-ahead\t %8.2lf \n", LookAheadGEMMTimer); printf("\t* Remain\t %8.2lf\tFlops %8.2le\tGflops %8.2lf\n", RemainGEMMTimer, allflops, allflops/RemainGEMMTimer*1e-9); printf(".. Time to Scatter %8.2lf \n", LookAheadScatterTimer + RemainScatterTimer); printf("\t* Look-ahead\t %8.2lf \n", LookAheadScatterTimer); printf("\t* Remain\t %8.2lf \n", RemainScatterTimer); printf("Total factorization time \t: %8.2lf seconds, \n", pxgstrfTimer); printf("--------\n"); printf("GEMM maximum block: %d-%d-%d\n", gemm_max_m, gemm_max_k, gemm_max_n); } #endif #if ( DEBUGlevel>=3 ) for (i = 0; i < Pr * Pc; ++i) { if (iam == i) { zPrintLblocks(iam, nsupers, grid, Glu_persist, Llu); zPrintUblocks(iam, nsupers, grid, Glu_persist, Llu); printf ("(%d)\n", iam); PrintInt10 ("Recv", nsupers, Llu->ToRecv); } MPI_Barrier (grid->comm); } #endif /******************************************************** * Free memory * ********************************************************/ if (Pr * Pc > 1) { SUPERLU_FREE (Lsub_buf_2[0]); /* also free Lsub_buf_2[1] */ SUPERLU_FREE (Lval_buf_2[0]); /* also free Lval_buf_2[1] */ if (Llu->bufmax[2] != 0) SUPERLU_FREE (Usub_buf_2[0]); if (Llu->bufmax[3] != 0) SUPERLU_FREE (Uval_buf_2[0]); if (U_diag_blk_send_req[myrow] != MPI_REQUEST_NULL) { /* wait for last Isend requests to complete, deallocate objects */ for (krow = 0; krow < Pr; ++krow) { if (krow != myrow) MPI_Wait (U_diag_blk_send_req + krow, &status); } } SUPERLU_FREE (U_diag_blk_send_req); } log_memory( -((Llu->bufmax[0] + Llu->bufmax[2]) * (num_look_aheads + 1) * iword + (Llu->bufmax[1] + Llu->bufmax[3]) * (num_look_aheads + 1) * dword), stat ); SUPERLU_FREE (Lsub_buf_2); SUPERLU_FREE (Lval_buf_2); SUPERLU_FREE (Usub_buf_2); SUPERLU_FREE (Uval_buf_2); SUPERLU_FREE (perm_c_supno); SUPERLU_FREE (perm_u); #ifdef ISORT SUPERLU_FREE (iperm_u); #endif SUPERLU_FREE (look_ahead); SUPERLU_FREE (factoredU); SUPERLU_FREE (factored); log_memory(-(6 * nsupers * iword), stat); for (i = 0; i <= num_look_aheads; i++) { SUPERLU_FREE (msgcnts[i]); SUPERLU_FREE (msgcntsU[i]); } SUPERLU_FREE (msgcnts); SUPERLU_FREE (msgcntsU); for (i = 0; i <= num_look_aheads; i++) { SUPERLU_FREE (send_reqs_u[i]); SUPERLU_FREE (recv_reqs_u[i]); SUPERLU_FREE (send_reqs[i]); SUPERLU_FREE (recv_reqs[i]); } SUPERLU_FREE (recv_reqs_u); SUPERLU_FREE (send_reqs_u); SUPERLU_FREE (recv_reqs); SUPERLU_FREE (send_reqs); #ifdef GPU_ACC checkCuda (cudaFreeHost (bigV)); checkCuda (cudaFreeHost (bigU)); cudaFree( (void*)dA ); /* Sherry added */ cudaFree( (void*)dB ); cudaFree( (void*)dC ); SUPERLU_FREE( handle ); SUPERLU_FREE( streams ); SUPERLU_FREE( stream_end_col ); #else #ifdef __INTEL_COMPILER _mm_free (bigU); _mm_free (bigV); #else SUPERLU_FREE (bigV); SUPERLU_FREE (bigU); #endif /* Decrement freed memory from memory stat. */ log_memory(-(bigv_size + bigu_size) * dword, stat); #endif SUPERLU_FREE (Llu->ujrow); // SUPERLU_FREE (tempv2d);/* Sherry */ SUPERLU_FREE (indirect); SUPERLU_FREE (indirect2); /* Sherry added */ SUPERLU_FREE (iuip); SUPERLU_FREE (ruip); ldt = sp_ienv_dist(3); log_memory( -(3 * ldt *ldt * dword + 2 * ldt * num_threads * iword + 2 * k * iword), stat ); /* Sherry added */ SUPERLU_FREE(omp_loop_time); SUPERLU_FREE(full_u_cols); SUPERLU_FREE(blk_ldu); #if ( PRNTlevel>=1 ) log_memory(-2 * ncb * dword, stat); #endif SUPERLU_FREE(lookAheadFullRow); SUPERLU_FREE(lookAheadStRow); SUPERLU_FREE(lookAhead_lptr); SUPERLU_FREE(lookAhead_ib); SUPERLU_FREE(RemainFullRow); SUPERLU_FREE(RemainStRow); SUPERLU_FREE(Remain_lptr); SUPERLU_FREE(Remain_ib); SUPERLU_FREE(Remain_info); SUPERLU_FREE(lookAhead_L_buff); SUPERLU_FREE(Remain_L_buff); log_memory( -(4 * mrb * iword + mrb * sizeof(Remain_info_t) + ldt * ldt * (num_look_aheads + 1) * dword + Llu->bufmax[1] * dword), stat ); SUPERLU_FREE(Ublock_info); SUPERLU_FREE(Ublock_info_iukp); SUPERLU_FREE(Ublock_info_rukp); SUPERLU_FREE(Ublock_info_jb); #if ( PROFlevel>=1 ) TIC (t1); #endif /* Prepare error message - find the smallesr index i that U(i,i)==0 */ if ( *info == 0 ) *info = n + 1; MPI_Allreduce (info, &iinfo, 1, MPI_INT, MPI_MIN, grid->comm); if ( iinfo == n + 1 ) *info = 0; else *info = iinfo; #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; { float msg_vol_max, msg_vol_sum, msg_cnt_max, msg_cnt_sum; MPI_Reduce (&msg_cnt, &msg_cnt_sum, 1, MPI_FLOAT, MPI_SUM, 0, grid->comm); MPI_Reduce (&msg_cnt, &msg_cnt_max, 1, MPI_FLOAT, MPI_MAX, 0, grid->comm); MPI_Reduce (&msg_vol, &msg_vol_sum, 1, MPI_FLOAT, MPI_SUM, 0, grid->comm); MPI_Reduce (&msg_vol, &msg_vol_max, 1, MPI_FLOAT, MPI_MAX, 0, grid->comm); if ( iam==0 ) { printf ("\tPZGSTRF comm stat:" "\tAvg\tMax\t\tAvg\tMax\n" "\t\t\tCount:\t%.0f\t%.0f\tVol(MB)\t%.2f\t%.2f\n", msg_cnt_sum / Pr / Pc, msg_cnt_max, msg_vol_sum / Pr / Pc * 1e-6, msg_vol_max * 1e-6); printf("\t\tcomm time on task 0: %8.2lf\n" "\t\t\tcomm down DIAG block %8.2lf\n" "\t\t\tcomm right L panel %8.2lf\n" "\t\t\tcomm down U panel %8.2lf\n", stat->utime[COMM], stat->utime[COMM_DIAG], stat->utime[COMM_RIGHT], stat->utime[COMM_DOWN]); //#include //int Digs = DECIMAL_DIG; printf("gemm_count %d\n", gemm_count); for (i = 0; i < gemm_count; ++i) fprintf(fgemm, "%8d%8d%8d\t %20.16e\t%8d\n", gemm_stats[i].m, gemm_stats[i].n, gemm_stats[i].k, gemm_stats[i].microseconds, prof_sendR[i]); fclose(fgemm); } SUPERLU_FREE(gemm_stats); SUPERLU_FREE(prof_sendR); } #endif #if ( PRNTlevel==3 ) MPI_Allreduce (&zero_msg, &iinfo, 1, MPI_INT, MPI_SUM, grid->comm); if (!iam) printf (".. # msg of zero size\t%d\n", iinfo); MPI_Allreduce (&total_msg, &iinfo, 1, MPI_INT, MPI_SUM, grid->comm); if (!iam) printf (".. # total msg\t%d\n", iinfo); #endif #if ( DEBUGlevel>=3 ) for (i = 0; i < Pr * Pc; ++i) { if (iam == i) { zPrintLblocks (iam, nsupers, grid, Glu_persist, Llu); zPrintUblocks (iam, nsupers, grid, Glu_persist, Llu); printf ("(%d)\n", iam); PrintInt10 ("Recv", nsupers, Llu->ToRecv); } MPI_Barrier (grid->comm); } #endif #if ( DEBUGlevel>=3 ) printf ("(%d) num_copy=%d, num_update=%d\n", iam, num_copy, num_update); #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC (iam, "Exit pzgstrf()"); #endif return 0; } /* PZGSTRF */ SuperLU_DIST_5.3.0/SRC/pzgstrs.c0000644013363400111340000012375513233431301015160 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Solves a system of distributed linear equations A*X = B with a * general N-by-N matrix A using the LU factors computed previously. * *
 * -- Distributed SuperLU routine (version 2.3) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * October 15, 2008
 * 
*/ #include "superlu_zdefs.h" /* * Sketch of the algorithm for L-solve: * ======================= * * Self-scheduling loop: * * while ( not finished ) { .. use message counter to control * * reveive a message; * * if ( message is Xk ) { * perform local block modifications into lsum[]; * lsum[i] -= L_i,k * X[k] * if all local updates done, Isend lsum[] to diagonal process; * * } else if ( message is LSUM ) { .. this must be a diagonal process * accumulate LSUM; * if ( all LSUM are received ) { * perform triangular solve for Xi; * Isend Xi down to the current process column; * perform local block modifications into lsum[]; * } * } * } * * * Auxiliary data structures: lsum[] / ilsum (pointer to lsum array) * ======================= * * lsum[] array (local) * + lsum has "nrhs" columns, row-wise is partitioned by supernodes * + stored by row blocks, column wise storage within a row block * + prepend a header recording the global block number. * * lsum[] ilsum[nsupers + 1] * * ----- * | | | <- header of size 2 --- * --------- <--------------------| | * | | | | | --- * | | | | | |-----------| | * | | | | | | --- * --------- | |-------| | * | | | <- header | | --- * --------- <--------| | |----| | * | | | | | | | --- * | | | | | | | * | | | | | | | * --------- | | * | | | <- header | | * --------- <------------| | * | | | | | | * | | | | | | * | | | | | | * --------- <---------------| */ /*#define ISEND_IRECV*/ /* * Function prototypes */ #ifdef _CRAY fortran void CTRSM(_fcd, _fcd, _fcd, _fcd, int*, int*, doublecomplex*, doublecomplex*, int*, doublecomplex*, int*); _fcd ftcs1; _fcd ftcs2; _fcd ftcs3; #endif /*! \brief * *
 * Purpose
 * =======
 *   Re-distribute B on the diagonal processes of the 2D process mesh.
 * 
 * Note
 * ====
 *   This routine can only be called after the routine pxgstrs_init(),
 *   in which the structures of the send and receive buffers are set up.
 *
 * Arguments
 * =========
 * 
 * B      (input) doublecomplex*
 *        The distributed right-hand side matrix of the possibly
 *        equilibrated system.
 *
 * m_loc  (input) int (local)
 *        The local row dimension of matrix B.
 *
 * nrhs   (input) int (global)
 *        Number of right-hand sides.
 *
 * ldb    (input) int (local)
 *        Leading dimension of matrix B.
 *
 * fst_row (input) int (global)
 *        The row number of B's first row in the global matrix.
 *
 * ilsum  (input) int* (global)
 *        Starting position of each supernode in a full array.
 *
 * x      (output) doublecomplex*
 *        The solution vector. It is valid only on the diagonal processes.
 *
 * ScalePermstruct (input) ScalePermstruct_t*
 *        The data structure to store the scaling and permutation vectors
 *        describing the transformations performed to the original matrix A.
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh.
 *
 * SOLVEstruct (input) SOLVEstruct_t*
 *        Contains the information for the communication during the
 *        solution phase.
 *
 * Return value
 * ============
 * 
*/ int_t pzReDistribute_B_to_X(doublecomplex *B, int_t m_loc, int nrhs, int_t ldb, int_t fst_row, int_t *ilsum, doublecomplex *x, ScalePermstruct_t *ScalePermstruct, Glu_persist_t *Glu_persist, gridinfo_t *grid, SOLVEstruct_t *SOLVEstruct) { int *SendCnt, *SendCnt_nrhs, *RecvCnt, *RecvCnt_nrhs; int *sdispls, *sdispls_nrhs, *rdispls, *rdispls_nrhs; int *ptr_to_ibuf, *ptr_to_dbuf; int_t *perm_r, *perm_c; /* row and column permutation vectors */ int_t *send_ibuf, *recv_ibuf; doublecomplex *send_dbuf, *recv_dbuf; int_t *xsup, *supno; int_t i, ii, irow, gbi, j, jj, k, knsupc, l, lk; int p, procs; pxgstrs_comm_t *gstrs_comm = SOLVEstruct->gstrs_comm; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Enter pzReDistribute_B_to_X()"); #endif /* ------------------------------------------------------------ INITIALIZATION. ------------------------------------------------------------*/ perm_r = ScalePermstruct->perm_r; perm_c = ScalePermstruct->perm_c; procs = grid->nprow * grid->npcol; xsup = Glu_persist->xsup; supno = Glu_persist->supno; SendCnt = gstrs_comm->B_to_X_SendCnt; SendCnt_nrhs = gstrs_comm->B_to_X_SendCnt + procs; RecvCnt = gstrs_comm->B_to_X_SendCnt + 2*procs; RecvCnt_nrhs = gstrs_comm->B_to_X_SendCnt + 3*procs; sdispls = gstrs_comm->B_to_X_SendCnt + 4*procs; sdispls_nrhs = gstrs_comm->B_to_X_SendCnt + 5*procs; rdispls = gstrs_comm->B_to_X_SendCnt + 6*procs; rdispls_nrhs = gstrs_comm->B_to_X_SendCnt + 7*procs; ptr_to_ibuf = gstrs_comm->ptr_to_ibuf; ptr_to_dbuf = gstrs_comm->ptr_to_dbuf; /* ------------------------------------------------------------ NOW COMMUNICATE THE ACTUAL DATA. ------------------------------------------------------------*/ k = sdispls[procs-1] + SendCnt[procs-1]; /* Total number of sends */ l = rdispls[procs-1] + RecvCnt[procs-1]; /* Total number of receives */ if ( !(send_ibuf = intMalloc_dist(k + l)) ) ABORT("Malloc fails for send_ibuf[]."); recv_ibuf = send_ibuf + k; if ( !(send_dbuf = doublecomplexMalloc_dist((k + l)* (size_t)nrhs)) ) ABORT("Malloc fails for send_dbuf[]."); recv_dbuf = send_dbuf + k * nrhs; for (p = 0; p < procs; ++p) { ptr_to_ibuf[p] = sdispls[p]; ptr_to_dbuf[p] = sdispls[p] * nrhs; } /* Copy the row indices and values to the send buffer. */ for (i = 0, l = fst_row; i < m_loc; ++i, ++l) { irow = perm_c[perm_r[l]]; /* Row number in Pc*Pr*B */ gbi = BlockNum( irow ); p = PNUM( PROW(gbi,grid), PCOL(gbi,grid), grid ); /* Diagonal process */ k = ptr_to_ibuf[p]; send_ibuf[k] = irow; k = ptr_to_dbuf[p]; RHS_ITERATE(j) { /* RHS is stored in row major in the buffer. */ send_dbuf[k++] = B[i + j*ldb]; } ++ptr_to_ibuf[p]; ptr_to_dbuf[p] += nrhs; } /* Communicate the (permuted) row indices. */ MPI_Alltoallv(send_ibuf, SendCnt, sdispls, mpi_int_t, recv_ibuf, RecvCnt, rdispls, mpi_int_t, grid->comm); /* Communicate the numerical values. */ MPI_Alltoallv(send_dbuf, SendCnt_nrhs, sdispls_nrhs, SuperLU_MPI_DOUBLE_COMPLEX, recv_dbuf, RecvCnt_nrhs, rdispls_nrhs, SuperLU_MPI_DOUBLE_COMPLEX, grid->comm); /* ------------------------------------------------------------ Copy buffer into X on the diagonal processes. ------------------------------------------------------------*/ ii = 0; for (p = 0; p < procs; ++p) { jj = rdispls_nrhs[p]; for (i = 0; i < RecvCnt[p]; ++i) { /* Only the diagonal processes do this; the off-diagonal processes have 0 RecvCnt. */ irow = recv_ibuf[ii]; /* The permuted row index. */ k = BlockNum( irow ); knsupc = SuperSize( k ); lk = LBi( k, grid ); /* Local block number. */ l = X_BLK( lk ); x[l - XK_H].r = k; /* Block number prepended in the header. */ x[l - XK_H].i = 0; irow = irow - FstBlockC(k); /* Relative row number in X-block */ RHS_ITERATE(j) { x[l + irow + j*knsupc] = recv_dbuf[jj++]; } ++ii; } } SUPERLU_FREE(send_ibuf); SUPERLU_FREE(send_dbuf); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Exit pzReDistribute_B_to_X()"); #endif return 0; } /* pzReDistribute_B_to_X */ /*! \brief * *
 * Purpose
 * =======
 *   Re-distribute X on the diagonal processes to B distributed on all
 *   the processes.
 *
 * Note
 * ====
 *   This routine can only be called after the routine pxgstrs_init(),
 *   in which the structures of the send and receive buffers are set up.
 * 
*/ int_t pzReDistribute_X_to_B(int_t n, doublecomplex *B, int_t m_loc, int_t ldb, int_t fst_row, int_t nrhs, doublecomplex *x, int_t *ilsum, ScalePermstruct_t *ScalePermstruct, Glu_persist_t *Glu_persist, gridinfo_t *grid, SOLVEstruct_t *SOLVEstruct) { int_t i, ii, irow, j, jj, k, knsupc, nsupers, l, lk; int_t *xsup, *supno; int *SendCnt, *SendCnt_nrhs, *RecvCnt, *RecvCnt_nrhs; int *sdispls, *rdispls, *sdispls_nrhs, *rdispls_nrhs; int *ptr_to_ibuf, *ptr_to_dbuf; int_t *send_ibuf, *recv_ibuf; doublecomplex *send_dbuf, *recv_dbuf; int_t *row_to_proc = SOLVEstruct->row_to_proc; /* row-process mapping */ pxgstrs_comm_t *gstrs_comm = SOLVEstruct->gstrs_comm; int iam, p, q, pkk, procs; int_t num_diag_procs, *diag_procs; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Enter pzReDistribute_X_to_B()"); #endif /* ------------------------------------------------------------ INITIALIZATION. ------------------------------------------------------------*/ xsup = Glu_persist->xsup; supno = Glu_persist->supno; nsupers = Glu_persist->supno[n-1] + 1; iam = grid->iam; procs = grid->nprow * grid->npcol; SendCnt = gstrs_comm->X_to_B_SendCnt; SendCnt_nrhs = gstrs_comm->X_to_B_SendCnt + procs; RecvCnt = gstrs_comm->X_to_B_SendCnt + 2*procs; RecvCnt_nrhs = gstrs_comm->X_to_B_SendCnt + 3*procs; sdispls = gstrs_comm->X_to_B_SendCnt + 4*procs; sdispls_nrhs = gstrs_comm->X_to_B_SendCnt + 5*procs; rdispls = gstrs_comm->X_to_B_SendCnt + 6*procs; rdispls_nrhs = gstrs_comm->X_to_B_SendCnt + 7*procs; ptr_to_ibuf = gstrs_comm->ptr_to_ibuf; ptr_to_dbuf = gstrs_comm->ptr_to_dbuf; k = sdispls[procs-1] + SendCnt[procs-1]; /* Total number of sends */ l = rdispls[procs-1] + RecvCnt[procs-1]; /* Total number of receives */ if ( !(send_ibuf = intMalloc_dist(k + l)) ) ABORT("Malloc fails for send_ibuf[]."); recv_ibuf = send_ibuf + k; if ( !(send_dbuf = doublecomplexMalloc_dist((k + l)*nrhs)) ) ABORT("Malloc fails for send_dbuf[]."); recv_dbuf = send_dbuf + k * nrhs; for (p = 0; p < procs; ++p) { ptr_to_ibuf[p] = sdispls[p]; ptr_to_dbuf[p] = sdispls_nrhs[p]; } num_diag_procs = SOLVEstruct->num_diag_procs; diag_procs = SOLVEstruct->diag_procs; for (p = 0; p < num_diag_procs; ++p) { /* For all diagonal processes. */ pkk = diag_procs[p]; if ( iam == pkk ) { for (k = p; k < nsupers; k += num_diag_procs) { knsupc = SuperSize( k ); lk = LBi( k, grid ); /* Local block number */ irow = FstBlockC( k ); l = X_BLK( lk ); for (i = 0; i < knsupc; ++i) { #if 0 ii = inv_perm_c[irow]; /* Apply X <== Pc'*Y */ #else ii = irow; #endif q = row_to_proc[ii]; jj = ptr_to_ibuf[q]; send_ibuf[jj] = ii; jj = ptr_to_dbuf[q]; RHS_ITERATE(j) { /* RHS stored in row major in buffer. */ send_dbuf[jj++] = x[l + i + j*knsupc]; } ++ptr_to_ibuf[q]; ptr_to_dbuf[q] += nrhs; ++irow; } } } } /* ------------------------------------------------------------ COMMUNICATE THE (PERMUTED) ROW INDICES AND NUMERICAL VALUES. ------------------------------------------------------------*/ MPI_Alltoallv(send_ibuf, SendCnt, sdispls, mpi_int_t, recv_ibuf, RecvCnt, rdispls, mpi_int_t, grid->comm); MPI_Alltoallv(send_dbuf, SendCnt_nrhs, sdispls_nrhs, SuperLU_MPI_DOUBLE_COMPLEX, recv_dbuf, RecvCnt_nrhs, rdispls_nrhs, SuperLU_MPI_DOUBLE_COMPLEX, grid->comm); /* ------------------------------------------------------------ COPY THE BUFFER INTO B. ------------------------------------------------------------*/ for (i = 0, k = 0; i < m_loc; ++i) { irow = recv_ibuf[i]; irow -= fst_row; /* Relative row number */ RHS_ITERATE(j) { /* RHS is stored in row major in the buffer. */ B[irow + j*ldb] = recv_dbuf[k++]; } } SUPERLU_FREE(send_ibuf); SUPERLU_FREE(send_dbuf); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Exit pzReDistribute_X_to_B()"); #endif return 0; } /* pzReDistribute_X_to_B */ /*! \brief * *
 * Purpose
 * =======
 *
 * PZGSTRS solves a system of distributed linear equations
 * A*X = B with a general N-by-N matrix A using the LU factorization
 * computed by PZGSTRF.
 * If the equilibration, and row and column permutations were performed,
 * the LU factorization was performed for A1 where
 *     A1 = Pc*Pr*diag(R)*A*diag(C)*Pc^T = L*U
 * and the linear system solved is
 *     A1 * Y = Pc*Pr*B1, where B was overwritten by B1 = diag(R)*B, and
 * the permutation to B1 by Pc*Pr is applied internally in this routine.
 * 
 * Arguments
 * =========
 *
 * n      (input) int (global)
 *        The order of the system of linear equations.
 *
 * LUstruct (input) LUstruct_t*
 *        The distributed data structures storing L and U factors.
 *        The L and U factors are obtained from PZGSTRF for
 *        the possibly scaled and permuted matrix A.
 *        See superlu_zdefs.h for the definition of 'LUstruct_t'.
 *        A may be scaled and permuted into A1, so that
 *        A1 = Pc*Pr*diag(R)*A*diag(C)*Pc^T = L*U
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh. It contains the MPI communicator, the number
 *        of process rows (NPROW), the number of process columns (NPCOL),
 *        and my process rank. It is an input argument to all the
 *        parallel routines.
 *        Grid can be initialized by subroutine SUPERLU_GRIDINIT.
 *        See superlu_defs.h for the definition of 'gridinfo_t'.
 *
 * B      (input/output) doublecomplex*
 *        On entry, the distributed right-hand side matrix of the possibly
 *        equilibrated system. That is, B may be overwritten by diag(R)*B.
 *        On exit, the distributed solution matrix Y of the possibly
 *        equilibrated system if info = 0, where Y = Pc*diag(C)^(-1)*X,
 *        and X is the solution of the original system.
 *
 * m_loc  (input) int (local)
 *        The local row dimension of matrix B.
 *
 * fst_row (input) int (global)
 *        The row number of B's first row in the global matrix.
 *
 * ldb    (input) int (local)
 *        The leading dimension of matrix B.
 *
 * nrhs   (input) int (global)
 *        Number of right-hand sides.
 * 
 * SOLVEstruct (input) SOLVEstruct_t* (global)
 *        Contains the information for the communication during the
 *        solution phase.
 *
 * stat   (output) SuperLUStat_t*
 *        Record the statistics about the triangular solves.
 *        See util.h for the definition of 'SuperLUStat_t'.
 *
 * info   (output) int*
 * 	   = 0: successful exit
 *	   < 0: if info = -i, the i-th argument had an illegal value
 * 
*/ void pzgstrs(int_t n, LUstruct_t *LUstruct, ScalePermstruct_t *ScalePermstruct, gridinfo_t *grid, doublecomplex *B, int_t m_loc, int_t fst_row, int_t ldb, int nrhs, SOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, int *info) { Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; doublecomplex alpha = {1.0, 0.0}; doublecomplex zero = {0.0, 0.0}; doublecomplex *lsum; /* Local running sum of the updates to B-components */ doublecomplex *x; /* X component at step k. */ /* NOTE: x and lsum are of same size. */ doublecomplex *lusup, *dest; doublecomplex *recvbuf, *tempv; doublecomplex *rtemp; /* Result of full matrix-vector multiply. */ int_t **Ufstnz_br_ptr = Llu->Ufstnz_br_ptr; int_t *Urbs, *Urbs1; /* Number of row blocks in each block column of U. */ Ucb_indptr_t **Ucb_indptr;/* Vertical linked list pointing to Uindex[] */ int_t **Ucb_valptr; /* Vertical linked list pointing to Unzval[] */ int_t kcol, krow, mycol, myrow; int_t i, ii, il, j, jj, k, lb, ljb, lk, lptr, luptr; int_t nb, nlb, nub, nsupers; int_t *xsup, *supno, *lsub, *usub; int_t *ilsum; /* Starting position of each supernode in lsum (LOCAL)*/ int Pc, Pr, iam; int knsupc, nsupr; int ldalsum; /* Number of lsum entries locally owned. */ int maxrecvsz, p, pi; int_t **Lrowind_bc_ptr; doublecomplex **Lnzval_bc_ptr; MPI_Status status; MPI_Request *send_req, recv_req; pxgstrs_comm_t *gstrs_comm = SOLVEstruct->gstrs_comm; /*-- Counts used for L-solve --*/ int_t *fmod; /* Modification count for L-solve -- Count the number of local block products to be summed into lsum[lk]. */ int_t **fsendx_plist = Llu->fsendx_plist; int_t nfrecvx = Llu->nfrecvx; /* Number of X components to be recv'd. */ int_t *frecv; /* Count of lsum[lk] contributions to be received from processes in this row. It is only valid on the diagonal processes. */ int_t nfrecvmod = 0; /* Count of total modifications to be recv'd. */ int_t nleaf = 0, nroot = 0; /*-- Counts used for U-solve --*/ int_t *bmod; /* Modification count for U-solve. */ int_t **bsendx_plist = Llu->bsendx_plist; int_t nbrecvx = Llu->nbrecvx; /* Number of X components to be recv'd. */ int_t *brecv; /* Count of modifications to be recv'd from processes in this row. */ int_t nbrecvmod = 0; /* Count of total modifications to be recv'd. */ double t; #if ( DEBUGlevel>=2 ) int_t Ublocks = 0; #endif int_t *mod_bit = Llu->mod_bit; /* flag contribution from each row block */ t = SuperLU_timer_(); /* Test input parameters. */ *info = 0; if ( n < 0 ) *info = -1; else if ( nrhs < 0 ) *info = -9; if ( *info ) { pxerr_dist("PZGSTRS", grid, -*info); return; } /* * Initialization. */ iam = grid->iam; Pc = grid->npcol; Pr = grid->nprow; myrow = MYROW( iam, grid ); mycol = MYCOL( iam, grid ); xsup = Glu_persist->xsup; supno = Glu_persist->supno; nsupers = supno[n-1] + 1; Lrowind_bc_ptr = Llu->Lrowind_bc_ptr; Lnzval_bc_ptr = Llu->Lnzval_bc_ptr; nlb = CEILING( nsupers, Pr ); /* Number of local block rows. */ #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter pzgstrs()"); #endif stat->ops[SOLVE] = 0.0; Llu->SolveMsgSent = 0; /* Save the count to be altered so it can be used by subsequent call to PDGSTRS. */ if ( !(fmod = intMalloc_dist(nlb)) ) ABORT("Calloc fails for fmod[]."); for (i = 0; i < nlb; ++i) fmod[i] = Llu->fmod[i]; if ( !(frecv = intMalloc_dist(nlb)) ) ABORT("Malloc fails for frecv[]."); Llu->frecv = frecv; k = SUPERLU_MAX( Llu->nfsendx, Llu->nbsendx ) + nlb; if ( !(send_req = (MPI_Request*) SUPERLU_MALLOC(k*sizeof(MPI_Request))) ) ABORT("Malloc fails for send_req[]."); #ifdef _CRAY ftcs1 = _cptofcd("L", strlen("L")); ftcs2 = _cptofcd("N", strlen("N")); ftcs3 = _cptofcd("U", strlen("U")); #endif /* Obtain ilsum[] and ldalsum for process column 0. */ ilsum = Llu->ilsum; ldalsum = Llu->ldalsum; /* Allocate working storage. */ knsupc = sp_ienv_dist(3); maxrecvsz = knsupc * nrhs + SUPERLU_MAX( XK_H, LSUM_H ); if ( !(lsum = doublecomplexCalloc_dist(((size_t)ldalsum)*nrhs + nlb*LSUM_H)) ) ABORT("Calloc fails for lsum[]."); if ( !(x = doublecomplexCalloc_dist(ldalsum * nrhs + nlb * XK_H)) ) ABORT("Calloc fails for x[]."); if ( !(recvbuf = doublecomplexMalloc_dist(maxrecvsz)) ) ABORT("Malloc fails for recvbuf[]."); if ( !(rtemp = doublecomplexCalloc_dist(maxrecvsz)) ) ABORT("Malloc fails for rtemp[]."); /*--------------------------------------------------- * Forward solve Ly = b. *---------------------------------------------------*/ /* Redistribute B into X on the diagonal processes. */ pzReDistribute_B_to_X(B, m_loc, nrhs, ldb, fst_row, ilsum, x, ScalePermstruct, Glu_persist, grid, SOLVEstruct); /* Set up the headers in lsum[]. */ ii = 0; for (k = 0; k < nsupers; ++k) { knsupc = SuperSize( k ); krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ il = LSUM_BLK( lk ); lsum[il - LSUM_H].r = k;/* Block number prepended in the header.*/ lsum[il - LSUM_H].i = 0; } ii += knsupc; } /* * Compute frecv[] and nfrecvmod counts on the diagonal processes. */ { superlu_scope_t *scp = &grid->rscp; #if 1 for (k = 0; k < nlb; ++k) mod_bit[k] = 0; for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* local block number */ kcol = PCOL( k, grid ); if ( mycol != kcol && fmod[lk] ) mod_bit[lk] = 1; /* contribution from off-diagonal */ } } /*PrintInt10("mod_bit", nlb, mod_bit);*/ #if ( PROFlevel>=2 ) t_reduce_tmp = SuperLU_timer_(); #endif /* Every process receives the count, but it is only useful on the diagonal processes. */ MPI_Allreduce( mod_bit, frecv, nlb, mpi_int_t, MPI_SUM, scp->comm ); #if ( PROFlevel>=2 ) t_reduce += SuperLU_timer_() - t_reduce_tmp; #endif for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* local block number */ kcol = PCOL( k, grid ); if ( mycol == kcol ) { /* diagonal process */ nfrecvmod += frecv[lk]; if ( !frecv[lk] && !fmod[lk] ) ++nleaf; } } } #else /* old */ for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ kcol = PCOL( k, grid ); /* Root process in this row scope. */ if ( mycol != kcol && fmod[lk] ) i = 1; /* Contribution from non-diagonal process. */ else i = 0; MPI_Reduce( &i, &frecv[lk], 1, mpi_int_t, MPI_SUM, kcol, scp->comm ); if ( mycol == kcol ) { /* Diagonal process. */ nfrecvmod += frecv[lk]; if ( !frecv[lk] && !fmod[lk] ) ++nleaf; #if ( DEBUGlevel>=2 ) printf("(%2d) frecv[%4d] %2d\n", iam, k, frecv[lk]); assert( frecv[lk] < Pc ); #endif } } } #endif } /* --------------------------------------------------------- Solve the leaf nodes first by all the diagonal processes. --------------------------------------------------------- */ #if ( DEBUGlevel>=2 ) printf("(%2d) nleaf %4d\n", iam, nleaf); #endif for (k = 0; k < nsupers && nleaf; ++k) { krow = PROW( k, grid ); kcol = PCOL( k, grid ); if ( myrow == krow && mycol == kcol ) { /* Diagonal process */ knsupc = SuperSize( k ); lk = LBi( k, grid ); if ( frecv[lk]==0 && fmod[lk]==0 ) { fmod[lk] = -1; /* Do not solve X[k] in the future. */ ii = X_BLK( lk ); lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY CTRSM(ftcs1, ftcs1, ftcs2, ftcs3, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #elif defined (USE_VENDOR_BLAS) ztrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc, 1, 1, 1, 1); #else ztrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif stat->ops[SOLVE] += 4 * knsupc * (knsupc - 1) * nrhs + 10 * knsupc * nrhs; /* complex division */ --nleaf; #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ for (p = 0; p < Pr; ++p) { if ( fsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm, &send_req[Llu->SolveMsgSent++]); #if 0 MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm ); #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii-XK_H], pi); #endif } } /* * Perform local block modifications: lsum[i] -= L_i,k * X[k] */ nb = lsub[0] - 1; lptr = BC_HEADER + LB_DESCRIPTOR + knsupc; luptr = knsupc; /* Skip diagonal block L(k,k). */ zlsum_fmod(lsum, x, &x[ii], rtemp, nrhs, knsupc, k, fmod, nb, lptr, luptr, xsup, grid, Llu, send_req, stat); } } /* if diagonal process ... */ } /* for k ... */ /* ----------------------------------------------------------- Compute the internal nodes asynchronously by all processes. ----------------------------------------------------------- */ #if ( DEBUGlevel>=2 ) printf("(%2d) nfrecvx %4d, nfrecvmod %4d, nleaf %4d\n", iam, nfrecvx, nfrecvmod, nleaf); #endif while ( nfrecvx || nfrecvmod ) { /* While not finished. */ /* Receive a message. */ MPI_Recv( recvbuf, maxrecvsz, SuperLU_MPI_DOUBLE_COMPLEX, MPI_ANY_SOURCE, MPI_ANY_TAG, grid->comm, &status ); k = (*recvbuf).r; #if ( DEBUGlevel>=2 ) printf("(%2d) Recv'd block %d, tag %2d\n", iam, k, status.MPI_TAG); #endif switch ( status.MPI_TAG ) { case Xk: --nfrecvx; lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; if ( lsub ) { nb = lsub[0]; lptr = BC_HEADER; luptr = 0; knsupc = SuperSize( k ); /* * Perform local block modifications: lsum[i] -= L_i,k * X[k] */ zlsum_fmod(lsum, x, &recvbuf[XK_H], rtemp, nrhs, knsupc, k, fmod, nb, lptr, luptr, xsup, grid, Llu, send_req, stat); } /* if lsub */ break; case LSUM: /* Receiver must be a diagonal process */ --nfrecvmod; lk = LBi( k, grid ); /* Local block number, row-wise. */ ii = X_BLK( lk ); knsupc = SuperSize( k ); tempv = &recvbuf[LSUM_H]; RHS_ITERATE(j) { for (i = 0; i < knsupc; ++i) z_add(&x[i + ii + j*knsupc], &x[i + ii + j*knsupc], &tempv[i + j*knsupc]); } if ( (--frecv[lk])==0 && fmod[lk]==0 ) { fmod[lk] = -1; /* Do not solve X[k] in the future. */ lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY CTRSM(ftcs1, ftcs1, ftcs2, ftcs3, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #elif defined (USE_VENDOR_BLAS) ztrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc, 1, 1, 1, 1); #else ztrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif stat->ops[SOLVE] += 4 * knsupc * (knsupc - 1) * nrhs + 10 * knsupc * nrhs; /* complex division */ #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ kcol = PCOL( k, grid ); for (p = 0; p < Pr; ++p) { if ( fsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); MPI_Isend( &x[ii-XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm, &send_req[Llu->SolveMsgSent++]); #if 0 MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm ); #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii-XK_H], pi); #endif } } /* * Perform local block modifications. */ nb = lsub[0] - 1; lptr = BC_HEADER + LB_DESCRIPTOR + knsupc; luptr = knsupc; /* Skip diagonal block L(k,k). */ zlsum_fmod(lsum, x, &x[ii], rtemp, nrhs, knsupc, k, fmod, nb, lptr, luptr, xsup, grid, Llu, send_req, stat); } /* if */ break; #if ( DEBUGlevel>=2 ) default: printf("(%2d) Recv'd wrong message tag %4d\n", iam, status.MPI_TAG); break; #endif } /* switch */ } /* while not finished ... */ #if ( PRNTlevel>=2 ) t = SuperLU_timer_() - t; if ( !iam ) printf(".. L-solve time\t%8.2f\n", t); t = SuperLU_timer_(); #endif #if ( DEBUGlevel==2 ) { printf("(%d) .. After L-solve: y =\n", iam); for (i = 0, k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); kcol = PCOL( k, grid ); if ( myrow == krow && mycol == kcol ) { /* Diagonal process */ knsupc = SuperSize( k ); lk = LBi( k, grid ); ii = X_BLK( lk ); for (j = 0; j < knsupc; ++j) printf("\t(%d)\t%4d\t%.10f\n", iam, xsup[k]+j, x[ii+j]); fflush(stdout); } MPI_Barrier( grid->comm ); } } #endif SUPERLU_FREE(fmod); SUPERLU_FREE(frecv); SUPERLU_FREE(rtemp); /*for (i = 0; i < Llu->SolveMsgSent; ++i) MPI_Request_free(&send_req[i]);*/ for (i = 0; i < Llu->SolveMsgSent; ++i) MPI_Wait(&send_req[i], &status); Llu->SolveMsgSent = 0; MPI_Barrier( grid->comm ); /*--------------------------------------------------- * Back solve Ux = y. * * The Y components from the forward solve is already * on the diagonal processes. *---------------------------------------------------*/ /* Save the count to be altered so it can be used by subsequent call to PZGSTRS. */ if ( !(bmod = intMalloc_dist(nlb)) ) ABORT("Calloc fails for bmod[]."); for (i = 0; i < nlb; ++i) bmod[i] = Llu->bmod[i]; if ( !(brecv = intMalloc_dist(nlb)) ) ABORT("Malloc fails for brecv[]."); Llu->brecv = brecv; /* * Compute brecv[] and nbrecvmod counts on the diagonal processes. */ { superlu_scope_t *scp = &grid->rscp; #if 1 for (k = 0; k < nlb; ++k) mod_bit[k] = 0; for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* local block number */ kcol = PCOL( k, grid ); /* root process in this row scope */ if ( mycol != kcol && bmod[lk] ) mod_bit[lk] = 1; /* Contribution from off-diagonal */ } } /* Every process receives the count, but it is only useful on the diagonal processes. */ MPI_Allreduce( mod_bit, brecv, nlb, mpi_int_t, MPI_SUM, scp->comm ); for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* local block number */ kcol = PCOL( k, grid ); /* root process in this row scope. */ if ( mycol == kcol ) { /* diagonal process */ nbrecvmod += brecv[lk]; if ( !brecv[lk] && !bmod[lk] ) ++nroot; #if ( DEBUGlevel>=2 ) printf("(%2d) brecv[%4d] %2d\n", iam, k, brecv[lk]); assert( brecv[lk] < Pc ); #endif } } } #else /* old */ for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ kcol = PCOL( k, grid ); /* Root process in this row scope. */ if ( mycol != kcol && bmod[lk] ) i = 1; /* Contribution from non-diagonal process. */ else i = 0; MPI_Reduce( &i, &brecv[lk], 1, mpi_int_t, MPI_SUM, kcol, scp->comm ); if ( mycol == kcol ) { /* Diagonal process. */ nbrecvmod += brecv[lk]; if ( !brecv[lk] && !bmod[lk] ) ++nroot; #if ( DEBUGlevel>=2 ) printf("(%2d) brecv[%4d] %2d\n", iam, k, brecv[lk]); assert( brecv[lk] < Pc ); #endif } } } #endif } /* Re-initialize lsum to zero. Each block header is already in place. */ for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { knsupc = SuperSize( k ); lk = LBi( k, grid ); il = LSUM_BLK( lk ); dest = &lsum[il]; RHS_ITERATE(j) { for (i = 0; i < knsupc; ++i) dest[i + j*knsupc] = zero; } } } /* Set up additional pointers for the index and value arrays of U. nub is the number of local block columns. */ nub = CEILING( nsupers, Pc ); /* Number of local block columns. */ if ( !(Urbs = (int_t *) intCalloc_dist(2*nub)) ) ABORT("Malloc fails for Urbs[]"); /* Record number of nonzero blocks in a block column. */ Urbs1 = Urbs + nub; if ( !(Ucb_indptr = SUPERLU_MALLOC(nub * sizeof(Ucb_indptr_t *))) ) ABORT("Malloc fails for Ucb_indptr[]"); if ( !(Ucb_valptr = SUPERLU_MALLOC(nub * sizeof(int_t *))) ) ABORT("Malloc fails for Ucb_valptr[]"); /* Count number of row blocks in a block column. One pass of the skeleton graph of U. */ for (lk = 0; lk < nlb; ++lk) { usub = Ufstnz_br_ptr[lk]; if ( usub ) { /* Not an empty block row. */ /* usub[0] -- number of column blocks in this block row. */ #if ( DEBUGlevel>=2 ) Ublocks += usub[0]; #endif i = BR_HEADER; /* Pointer in index array. */ for (lb = 0; lb < usub[0]; ++lb) { /* For all column blocks. */ k = usub[i]; /* Global block number */ ++Urbs[LBj(k,grid)]; i += UB_DESCRIPTOR + SuperSize( k ); } } } /* Set up the vertical linked lists for the row blocks. One pass of the skeleton graph of U. */ for (lb = 0; lb < nub; ++lb) { if ( Urbs[lb] ) { /* Not an empty block column. */ if ( !(Ucb_indptr[lb] = SUPERLU_MALLOC(Urbs[lb] * sizeof(Ucb_indptr_t))) ) ABORT("Malloc fails for Ucb_indptr[lb][]"); if ( !(Ucb_valptr[lb] = (int_t *) intMalloc_dist(Urbs[lb])) ) ABORT("Malloc fails for Ucb_valptr[lb][]"); } } for (lk = 0; lk < nlb; ++lk) { /* For each block row. */ usub = Ufstnz_br_ptr[lk]; if ( usub ) { /* Not an empty block row. */ i = BR_HEADER; /* Pointer in index array. */ j = 0; /* Pointer in nzval array. */ for (lb = 0; lb < usub[0]; ++lb) { /* For all column blocks. */ k = usub[i]; /* Global block number, column-wise. */ ljb = LBj( k, grid ); /* Local block number, column-wise. */ Ucb_indptr[ljb][Urbs1[ljb]].lbnum = lk; Ucb_indptr[ljb][Urbs1[ljb]].indpos = i; Ucb_valptr[ljb][Urbs1[ljb]] = j; ++Urbs1[ljb]; j += usub[i+1]; i += UB_DESCRIPTOR + SuperSize( k ); } } } #if ( DEBUGlevel>=2 ) for (p = 0; p < Pr*Pc; ++p) { if (iam == p) { printf("(%2d) .. Ublocks %d\n", iam, Ublocks); for (lb = 0; lb < nub; ++lb) { printf("(%2d) Local col %2d: # row blocks %2d\n", iam, lb, Urbs[lb]); if ( Urbs[lb] ) { for (i = 0; i < Urbs[lb]; ++i) printf("(%2d) .. row blk %2d:\ lbnum %d, indpos %d, valpos %d\n", iam, i, Ucb_indptr[lb][i].lbnum, Ucb_indptr[lb][i].indpos, Ucb_valptr[lb][i]); } } } MPI_Barrier( grid->comm ); } for (p = 0; p < Pr*Pc; ++p) { if ( iam == p ) { printf("\n(%d) bsendx_plist[][]", iam); for (lb = 0; lb < nub; ++lb) { printf("\n(%d) .. local col %2d: ", iam, lb); for (i = 0; i < Pr; ++i) printf("%4d", bsendx_plist[lb][i]); } printf("\n"); } MPI_Barrier( grid->comm ); } #endif /* DEBUGlevel */ #if ( PRNTlevel>=3 ) t = SuperLU_timer_() - t; if ( !iam) printf(".. Setup U-solve time\t%8.2f\n", t); t = SuperLU_timer_(); #endif /* * Solve the roots first by all the diagonal processes. */ #if ( DEBUGlevel>=2 ) printf("(%2d) nroot %4d\n", iam, nroot); #endif for (k = nsupers-1; k >= 0 && nroot; --k) { krow = PROW( k, grid ); kcol = PCOL( k, grid ); if ( myrow == krow && mycol == kcol ) { /* Diagonal process. */ knsupc = SuperSize( k ); lk = LBi( k, grid ); /* Local block number, row-wise. */ if ( brecv[lk]==0 && bmod[lk]==0 ) { bmod[lk] = -1; /* Do not solve X[k] in the future. */ ii = X_BLK( lk ); lk = LBj( k, grid ); /* Local block number, column-wise */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY CTRSM(ftcs1, ftcs3, ftcs2, ftcs2, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #elif defined (USE_VENDOR_BLAS) ztrsm_("L", "U", "N", "N", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc, 1, 1, 1, 1); #else ztrsm_("L", "U", "N", "N", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif stat->ops[SOLVE] += 4 * knsupc * (knsupc + 1) * nrhs + 10 * knsupc * nrhs; /* complex division */ --nroot; #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ for (p = 0; p < Pr; ++p) { if ( bsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm, &send_req[Llu->SolveMsgSent++]); #if 0 MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm ); #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii-XK_H], pi); #endif } } /* * Perform local block modifications: lsum[i] -= U_i,k * X[k] */ if ( Urbs[lk] ) zlsum_bmod(lsum, x, &x[ii], nrhs, k, bmod, Urbs, Ucb_indptr, Ucb_valptr, xsup, grid, Llu, send_req, stat); } /* if root ... */ } /* if diagonal process ... */ } /* for k ... */ /* * Compute the internal nodes asynchronously by all processes. */ while ( nbrecvx || nbrecvmod ) { /* While not finished. */ /* Receive a message. */ MPI_Recv( recvbuf, maxrecvsz, SuperLU_MPI_DOUBLE_COMPLEX, MPI_ANY_SOURCE, MPI_ANY_TAG, grid->comm, &status ); k = (*recvbuf).r; #if ( DEBUGlevel>=2 ) printf("(%2d) Recv'd block %d, tag %2d\n", iam, k, status.MPI_TAG); #endif switch ( status.MPI_TAG ) { case Xk: --nbrecvx; lk = LBj( k, grid ); /* Local block number, column-wise. */ /* * Perform local block modifications: * lsum[i] -= U_i,k * X[k] */ zlsum_bmod(lsum, x, &recvbuf[XK_H], nrhs, k, bmod, Urbs, Ucb_indptr, Ucb_valptr, xsup, grid, Llu, send_req, stat); break; case LSUM: /* Receiver must be a diagonal process */ --nbrecvmod; lk = LBi( k, grid ); /* Local block number, row-wise. */ ii = X_BLK( lk ); knsupc = SuperSize( k ); tempv = &recvbuf[LSUM_H]; RHS_ITERATE(j) { for (i = 0; i < knsupc; ++i) z_add(&x[i + ii + j*knsupc], &x[i + ii + j*knsupc], &tempv[i + j*knsupc]); } if ( (--brecv[lk])==0 && bmod[lk]==0 ) { bmod[lk] = -1; /* Do not solve X[k] in the future. */ lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY CTRSM(ftcs1, ftcs3, ftcs2, ftcs2, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #elif defined (USE_VENDOR_BLAS) ztrsm_("L", "U", "N", "N", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc, 1, 1, 1, 1); #else ztrsm_("L", "U", "N", "N", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif stat->ops[SOLVE] += 4 * knsupc * (knsupc + 1) * nrhs + 10 * knsupc * nrhs; /* complex division */ #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ kcol = PCOL( k, grid ); for (p = 0; p < Pr; ++p) { if ( bsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm, &send_req[Llu->SolveMsgSent++] ); #if 0 MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm ); #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii - XK_H], pi); #endif } } /* * Perform local block modifications: * lsum[i] -= U_i,k * X[k] */ if ( Urbs[lk] ) zlsum_bmod(lsum, x, &x[ii], nrhs, k, bmod, Urbs, Ucb_indptr, Ucb_valptr, xsup, grid, Llu, send_req, stat); } /* if becomes solvable */ break; #if ( DEBUGlevel>=2 ) default: printf("(%2d) Recv'd wrong message tag %4d\n", iam, status.MPI_TAG); break; #endif } /* switch */ } /* while not finished ... */ #if ( PRNTlevel>=3 ) t = SuperLU_timer_() - t; if ( !iam ) printf(".. U-solve time\t%8.2f\n", t); #endif #if ( DEBUGlevel>=2 ) { doublecomplex *x_col; int diag; printf("\n(%d) .. After U-solve: x (ON DIAG PROCS) = \n", iam); ii = 0; for (k = 0; k < nsupers; ++k) { knsupc = SuperSize( k ); krow = PROW( k, grid ); kcol = PCOL( k, grid ); diag = PNUM( krow, kcol, grid); if ( iam == diag ) { /* Diagonal process. */ lk = LBi( k, grid ); jj = X_BLK( lk ); x_col = &x[jj]; RHS_ITERATE(j) { for (i = 0; i < knsupc; ++i) { /* X stored in blocks */ printf("\t(%d)\t%4d\t%.10f\n", iam, xsup[k]+i, x_col[i]); } x_col += knsupc; } } ii += knsupc; } /* for k ... */ } #endif pzReDistribute_X_to_B(n, B, m_loc, ldb, fst_row, nrhs, x, ilsum, ScalePermstruct, Glu_persist, grid, SOLVEstruct); /* Deallocate storage. */ SUPERLU_FREE(lsum); SUPERLU_FREE(x); SUPERLU_FREE(recvbuf); for (i = 0; i < nub; ++i) { if ( Urbs[i] ) { SUPERLU_FREE(Ucb_indptr[i]); SUPERLU_FREE(Ucb_valptr[i]); } } SUPERLU_FREE(Ucb_indptr); SUPERLU_FREE(Ucb_valptr); SUPERLU_FREE(Urbs); SUPERLU_FREE(bmod); SUPERLU_FREE(brecv); /*for (i = 0; i < Llu->SolveMsgSent; ++i) MPI_Request_free(&send_req[i]);*/ for (i = 0; i < Llu->SolveMsgSent; ++i) MPI_Wait(&send_req[i], &status); SUPERLU_FREE(send_req); MPI_Barrier( grid->comm ); stat->utime[SOLVE] = SuperLU_timer_() - t; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pzgstrs()"); #endif return; } /* PZGSTRS */ SuperLU_DIST_5.3.0/SRC/superlu_enum_consts.h0000644013363400111340000000673413233431301017562 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /** @file superlu_enum_consts.h * \brief enum constants header file * * -- SuperLU routine (version 4.1) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley, * October 1, 2010 * January 28, 2018 * */ #ifndef __SUPERLU_ENUM_CONSTS /* allow multiple inclusions */ #define __SUPERLU_ENUM_CONSTS /*********************************************************************** * Enumerate types ***********************************************************************/ typedef enum {NO, YES} yes_no_t; typedef enum {DOFACT, SamePattern, SamePattern_SameRowPerm, FACTORED} fact_t; typedef enum {NOROWPERM, LargeDiag, MY_PERMR} rowperm_t; typedef enum {NATURAL, MMD_ATA, MMD_AT_PLUS_A, COLAMD, METIS_AT_PLUS_A, PARMETIS, ZOLTAN, MY_PERMC} colperm_t; typedef enum {NOTRANS, TRANS, CONJ} trans_t; typedef enum {NOEQUIL, ROW, COL, BOTH} DiagScale_t; typedef enum {NOREFINE, SLU_SINGLE=1, SLU_DOUBLE, SLU_EXTRA} IterRefine_t; //typedef enum {LUSUP, UCOL, LSUB, USUB, LLVL, ULVL, NO_MEMTYPE} MemType; typedef enum {USUB, LSUB, UCOL, LUSUP, LLVL, ULVL, NO_MEMTYPE} MemType; typedef enum {HEAD, TAIL} stack_end_t; typedef enum {SYSTEM, USER} LU_space_t; typedef enum {ONE_NORM, TWO_NORM, INF_NORM} norm_t; typedef enum {SILU, SMILU_1, SMILU_2, SMILU_3} milu_t; #if 0 typedef enum {NODROP = 0x0000, DROP_BASIC = 0x0001, /* ILU(tau) */ DROP_PROWS = 0x0002, /* ILUTP: keep p maximum rows */ DROP_COLUMN = 0x0004, /* ILUTP: for j-th column, p = gamma * nnz(A(:,j)) */ DROP_AREA = 0x0008, /* ILUTP: for j-th column, use nnz(F(:,1:j)) / nnz(A(:,1:j)) to limit memory growth */ DROP_SECONDARY = 0x000E, /* PROWS | COLUMN | AREA */ DROP_DYNAMIC = 0x0010, DROP_INTERP = 0x0100} rule_t; #endif /* * The following enumerate type is used by the statistics variable * to keep track of flop count and time spent at various stages. * * Note that not all of the fields are disjoint. */ typedef enum { COLPERM, /* find a column ordering that minimizes fills */ ROWPERM, /* find a row ordering maximizes diagonal. */ RELAX, /* find artificial supernodes */ ETREE, /* compute column etree */ EQUIL, /* equilibrate the original matrix */ SYMBFAC, /* symbolic factorization. */ DIST, /* distribute matrix. */ FACT, /* perform LU factorization */ COMM, /* communication for factorization */ COMM_DIAG, /* Bcast diagonal block to process column */ COMM_RIGHT, /* communicate L panel */ COMM_DOWN, /* communicate U panel */ SOL_COMM,/* communication for solve */ RCOND, /* estimate reciprocal condition number */ SOLVE, /* forward and back solves */ REFINE, /* perform iterative refinement */ TRSV, /* fraction of FACT spent in xTRSV */ GEMV, /* fraction of FACT spent in xGEMV */ FERR, /* estimate error bounds after iterative refinement */ NPHASES /* total number of phases */ } PhaseType; #endif /* __SUPERLU_ENUM_CONSTS */ SuperLU_DIST_5.3.0/SRC/dcomplex.h0000644013363400111340000000444413233431301015255 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Header for dcomplex.c * *
 * -- Distributed SuperLU routine (version 1.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 * 
*/ /* * This header file is to be included in source files z*.c */ #ifndef __SUPERLU_DCOMPLEX /* allow multiple inclusions */ #define __SUPERLU_DCOMPLEX #include #include "superlu_defs.h" typedef struct { double r, i; } doublecomplex; /* * These variables will be defined to be MPI datatypes for complex * and double complex. I'm too lazy to declare them external in every * file that needs them. * * Use WINOWS_EXPORT_ALL_SYMBOLS on windows to export all symbols when * building a shared library. * Introduce macro SUPERLU_DIST_EXPORT to correctly export the only * remaining data symbol SuperLU_MPI_DOUBLE_COMPLEX. */ extern SUPERLU_DIST_EXPORT MPI_Datatype SuperLU_MPI_DOUBLE_COMPLEX; /* Macro definitions */ /*! \brief Complex Addition c = a + b */ #define z_add(c, a, b) { (c)->r = (a)->r + (b)->r; \ (c)->i = (a)->i + (b)->i; } /*! \brief Complex Subtraction c = a - b */ #define z_sub(c, a, b) { (c)->r = (a)->r - (b)->r; \ (c)->i = (a)->i - (b)->i; } /*! \brief Complex-Double Multiplication */ #define zd_mult(c, a, b) { (c)->r = (a)->r * (b); \ (c)->i = (a)->i * (b); } /*! \brief Complex-Complex Multiplication */ #define zz_mult(c, a, b) { \ double cr, ci; \ cr = (a)->r * (b)->r - (a)->i * (b)->i; \ ci = (a)->i * (b)->r + (a)->r * (b)->i; \ (c)->r = cr; \ (c)->i = ci; \ } /*! \brief Complex equality testing */ #define z_eq(a, b) ( (a)->r == (b)->r && (a)->i == (b)->i ) #ifdef __cplusplus extern "C" { #endif /* Prototypes for functions in dcomplex.c */ void slud_z_div(doublecomplex *, doublecomplex *, doublecomplex *); double slud_z_abs(doublecomplex *); /* exact */ double slud_z_abs1(doublecomplex *); /* approximate */ #ifdef __cplusplus } #endif #endif /* __SUPERLU_DCOMPLEX */ SuperLU_DIST_5.3.0/SRC/superlu_timer.c0000644013363400111340000000260013233431301016324 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Returns the time in seconds used by the process * *
 * Purpose
 * ======= 
 *	Returns the time in seconds used by the process.
 *
 * Note: the timer function call is machine dependent. Use conditional
 *       compilation to choose the appropriate function.
 * 
*/ #include "superlu_defs.h" #ifdef SUN /* * It uses the system call gethrtime(3C), which is accurate to * nanoseconds. */ #include double SuperLU_timer_() { return ( (double)gethrtime() / 1e9 ); } #elif defined ( UNIX_TIMER ) #include #include #include #include double SuperLU_timer_() { struct tms use; double tmp; int clocks_per_sec = sysconf(_SC_CLK_TCK); times(&use); tmp = use.tms_utime; tmp += use.tms_stime; return (double)(tmp) / clocks_per_sec; } #elif _WIN32 #include double SuperLU_timer_() { clock_t t; t=clock(); return ((double)t)/CLOCKS_PER_SEC; } #else #include double SuperLU_timer_() { return MPI_Wtime(); } #endif SuperLU_DIST_5.3.0/SRC/superlu_dist_config.h.in0000644013363400111340000000034713233431301020114 0ustar xiaoyessg/* superlu_dist_config.h.in */ /* Enable parmetis */ #cmakedefine HAVE_PARMETIS @HAVE_PARMETIS@ /* enable 64bit index mode */ #cmakedefine XSDK_INDEX_SIZE @XSDK_INDEX_SIZE@ #if (XSDK_INDEX_SIZE == 64) #define _LONGINT 1 #endif SuperLU_DIST_5.3.0/SRC/pzgstrf_irecv.c0000644013363400111340000011716713233431301016333 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Performs LU factorization in parallel * *
 * -- Distributed SuperLU routine (version 1.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 *
 * Modified:
 *     Feburary 7, 2001    use MPI_Isend/MPI_Irecv
 *
 *
 * Sketch of the algorithm
 * =======================
 *
 * The following relations hold:
 *     * A_kk = L_kk * U_kk
 *     * L_ik = Aik * U_kk^(-1)
 *     * U_kj = L_kk^(-1) * A_kj
 *
 *              ----------------------------------
 *              |   |                            |
 *              ----|-----------------------------
 *              |   | \ U_kk|                    |
 *              |   |   \   |        U_kj        |
 *              |   |L_kk \ |         ||         |
 *              ----|-------|---------||----------
 *              |   |       |         \/         |
 *              |   |       |                    |
 *              |   |       |                    |
 *              |   |       |                    |
 *              |   | L_ik ==>       A_ij        |
 *              |   |       |                    |
 *              |   |       |                    |
 *              |   |       |                    |
 *              ----------------------------------
 *
 * Handle the first block of columns separately.
 *     * Factor diagonal and subdiagonal blocks and test for exact
 *       singularity. ( pzgstrf2(0), one column at a time )
 *     * Compute block row of U
 *     * Update trailing matrix
 * 
 * Loop over the remaining blocks of columns.
 *   mycol = MYCOL( iam, grid );
 *   myrow = MYROW( iam, grid );
 *   N = nsupers;
 *   For (k = 1; k < N; ++k) {
 *       krow = PROW( k, grid );
 *       kcol = PCOL( k, grid );
 *       Pkk = PNUM( krow, kcol, grid );
 *
 *     * Factor diagonal and subdiagonal blocks and test for exact
 *       singularity.
 *       if ( mycol == kcol ) {
 *           pzgstrf2(k), one column at a time 
 *       }
 *
 *     * Parallel triangular solve
 *       if ( iam == Pkk ) multicast L_k,k to this process row;
 *       if ( myrow == krow && mycol != kcol ) {
 *          Recv L_k,k from process Pkk;
 *          for (j = k+1; j < N; ++j) 
 *              if ( PCOL( j, grid ) == mycol && A_k,j != 0 )
 *                 U_k,j = L_k,k \ A_k,j;
 *       }
 *
 *     * Parallel rank-k update
 *       if ( myrow == krow ) multicast U_k,k+1:N to this process column;
 *       if ( mycol == kcol ) multicast L_k+1:N,k to this process row;
 *       if ( myrow != krow ) {
 *          Pkj = PNUM( krow, mycol, grid );
 *          Recv U_k,k+1:N from process Pkj;
 *       }
 *       if ( mycol != kcol ) {
 *          Pik = PNUM( myrow, kcol, grid );
 *          Recv L_k+1:N,k from process Pik;
 *       }
 *       for (j = k+1; k < N; ++k) {
 *          for (i = k+1; i < N; ++i) 
 *              if ( myrow == PROW( i, grid ) && mycol == PCOL( j, grid )
 *                   && L_i,k != 0 && U_k,j != 0 )
 *                 A_i,j = A_i,j - L_i,k * U_k,j;
 *       }
 *  }
 *
 *
 * Remaining issues
 *   (1) Use local indices for L subscripts and SPA.  [DONE]
 * 
*/ #include #include "superlu_zdefs.h" /* * Internal prototypes */ static void pzgstrf2(superlu_options_t *, int_t, double, Glu_persist_t *, gridinfo_t *, LocalLU_t *, SuperLUStat_t *, int *); #ifdef _CRAY static void pzgstrs2(int_t, int_t, Glu_persist_t *, gridinfo_t *, LocalLU_t *, SuperLUStat_t *, _fcd, _fcd, _fcd); #else static void pzgstrs2(int_t, int_t, Glu_persist_t *, gridinfo_t *, LocalLU_t *, SuperLUStat_t *); #endif /************************************************************************/ /*! \brief * *
 * Purpose
 * =======
 *
 *  PZGSTRF performs the LU factorization in parallel.
 *
 * Arguments
 * =========
 * 
 * options (input) superlu_options_t*
 *         The structure defines the input parameters to control
 *         how the LU decomposition will be performed.
 *         The following field should be defined:
 *         o ReplaceTinyPivot (yes_no_t)
 *           Specifies whether to replace the tiny diagonals by
 *           sqrt(epsilon)*norm(A) during LU factorization.
 *
 * m      (input) int
 *        Number of rows in the matrix.
 *
 * n      (input) int
 *        Number of columns in the matrix.
 *
 * anorm  (input) double
 *        The norm of the original matrix A, or the scaled A if
 *        equilibration was done.
 *
 * LUstruct (input/output) LUstruct_t*
 *         The data structures to store the distributed L and U factors.
 *         The following fields should be defined:
 *
 *         o Glu_persist (input) Glu_persist_t*
 *           Global data structure (xsup, supno) replicated on all processes,
 *           describing the supernode partition in the factored matrices
 *           L and U:
 *	       xsup[s] is the leading column of the s-th supernode,
 *             supno[i] is the supernode number to which column i belongs.
 *
 *         o Llu (input/output) LocalLU_t*
 *           The distributed data structures to store L and U factors.
 *           See superlu_zdefs.h for the definition of 'LocalLU_t'.
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh. It contains the MPI communicator, the number
 *        of process rows (NPROW), the number of process columns (NPCOL),
 *        and my process rank. It is an input argument to all the
 *        parallel routines.
 *        Grid can be initialized by subroutine SUPERLU_GRIDINIT.
 *        See superlu_zdefs.h for the definition of 'gridinfo_t'.
 *
 * stat   (output) SuperLUStat_t*
 *        Record the statistics on runtime and floating-point operation count.
 *        See util.h for the definition of 'SuperLUStat_t'.
 *
 * info   (output) int*
 *        = 0: successful exit
 *        < 0: if info = -i, the i-th argument had an illegal value
 *        > 0: if info = i, U(i,i) is exactly zero. The factorization has
 *             been completed, but the factor U is exactly singular,
 *             and division by zero will occur if it is used to solve a
 *             system of equations.
 * 
*/ int_t pzgstrf /************************************************************************/ ( superlu_options_t *options, int m, int n, double anorm, LUstruct_t *LUstruct, gridinfo_t *grid, SuperLUStat_t *stat, int *info ) { #ifdef _CRAY _fcd ftcs = _cptofcd("N", strlen("N")); _fcd ftcs1 = _cptofcd("L", strlen("L")); _fcd ftcs2 = _cptofcd("N", strlen("N")); _fcd ftcs3 = _cptofcd("U", strlen("U")); #endif doublecomplex zero = {0.0, 0.0}; doublecomplex alpha = {1.0, 0.0}, beta = {0.0, 0.0}; int_t *xsup; int_t *lsub, *lsub1, *usub, *Usub_buf, *Lsub_buf_2[2]; /* Need 2 buffers to implement Irecv. */ doublecomplex *lusup, *lusup1, *uval, *Uval_buf, *Lval_buf_2[2]; /* Need 2 buffers to implement Irecv. */ int_t fnz, i, ib, ijb, ilst, it, iukp, jb, jj, klst, knsupc, lb, lib, ldv, ljb, lptr, lptr0, lptrj, luptr, luptr0, luptrj, nlb, nub, nsupc, rel, rukp; int_t Pc, Pr; int iam, kcol, krow, mycol, myrow, pi, pj; int j, k, lk, nsupers; int nsupr, nbrow, segsize; int msgcnt[4]; /* Count the size of the message xfer'd in each buffer: * 0 : transferred in Lsub_buf[] * 1 : transferred in Lval_buf[] * 2 : transferred in Usub_buf[] * 3 : transferred in Uval_buf[] */ int_t msg0, msg2; int_t **Ufstnz_br_ptr, **Lrowind_bc_ptr; doublecomplex **Unzval_br_ptr, **Lnzval_bc_ptr; int_t *index; doublecomplex *nzval; int_t *iuip, *ruip;/* Pointers to U index/nzval; size ceil(NSUPERS/Pr). */ doublecomplex *ucol; int_t *indirect; doublecomplex *tempv, *tempv2d; int_t iinfo; int_t *ToRecv, *ToSendD, **ToSendR; Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; superlu_scope_t *scp; float s_eps; double thresh; doublecomplex *tempU2d, *tempu; int full, ldt, ldu, lead_zero, ncols; MPI_Request recv_req[4], *send_req; MPI_Status status; #if ( DEBUGlevel>=2 ) int_t num_copy=0, num_update=0; #endif #if ( PRNTlevel==3 ) int_t zero_msg = 0, total_msg = 0; #endif #if ( PROFlevel>=1 ) double t1, t2; float msg_vol = 0, msg_cnt = 0; int_t iword = sizeof(int_t), zword = sizeof(doublecomplex); #endif /* Test the input parameters. */ *info = 0; if ( m < 0 ) *info = -2; else if ( n < 0 ) *info = -3; if ( *info ) { pxerbla("pzgstrf", grid, -*info); return (-1); } /* Quick return if possible. */ if ( m == 0 || n == 0 ) return 0; /* * Initialization. */ iam = grid->iam; Pc = grid->npcol; Pr = grid->nprow; myrow = MYROW( iam, grid ); mycol = MYCOL( iam, grid ); nsupers = Glu_persist->supno[n-1] + 1; xsup = Glu_persist->xsup; s_eps = slamch_("Epsilon"); thresh = s_eps * anorm; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter pzgstrf()"); #endif stat->ops[FACT] = 0.0; if ( Pr*Pc > 1 ) { i = Llu->bufmax[0]; if ( !(Llu->Lsub_buf_2[0] = intMalloc_dist(2 * ((size_t)i))) ) ABORT("Malloc fails for Lsub_buf."); Llu->Lsub_buf_2[1] = Llu->Lsub_buf_2[0] + i; i = Llu->bufmax[1]; if ( !(Llu->Lval_buf_2[0] = doublecomplexMalloc_dist(2 * ((size_t)i))) ) ABORT("Malloc fails for Lval_buf[]."); Llu->Lval_buf_2[1] = Llu->Lval_buf_2[0] + i; if ( Llu->bufmax[2] != 0 ) if ( !(Llu->Usub_buf = intMalloc_dist(Llu->bufmax[2])) ) ABORT("Malloc fails for Usub_buf[]."); if ( Llu->bufmax[3] != 0 ) if ( !(Llu->Uval_buf = doublecomplexMalloc_dist(Llu->bufmax[3])) ) ABORT("Malloc fails for Uval_buf[]."); if ( !(send_req = (MPI_Request *) SUPERLU_MALLOC(2*Pc*sizeof(MPI_Request)))) ABORT("Malloc fails for send_req[]."); } if ( !(Llu->ujrow = doublecomplexMalloc_dist(sp_ienv_dist(3))) ) ABORT("Malloc fails for ujrow[]."); #if ( PRNTlevel>=1 ) if ( !iam ) { printf(".. thresh = s_eps %e * anorm %e = %e\n", s_eps, anorm, thresh); printf(".. Buffer size: Lsub %d\tLval %d\tUsub %d\tUval %d\tLDA %d\n", Llu->bufmax[0], Llu->bufmax[1], Llu->bufmax[2], Llu->bufmax[3], Llu->bufmax[4]); } #endif Lsub_buf_2[0] = Llu->Lsub_buf_2[0]; Lsub_buf_2[1] = Llu->Lsub_buf_2[1]; Lval_buf_2[0] = Llu->Lval_buf_2[0]; Lval_buf_2[1] = Llu->Lval_buf_2[1]; Usub_buf = Llu->Usub_buf; Uval_buf = Llu->Uval_buf; Lrowind_bc_ptr = Llu->Lrowind_bc_ptr; Lnzval_bc_ptr = Llu->Lnzval_bc_ptr; Ufstnz_br_ptr = Llu->Ufstnz_br_ptr; Unzval_br_ptr = Llu->Unzval_br_ptr; ToRecv = Llu->ToRecv; ToSendD = Llu->ToSendD; ToSendR = Llu->ToSendR; ldt = sp_ienv_dist(3); /* Size of maximum supernode */ if ( !(tempv2d = doublecomplexCalloc_dist(2*((size_t)ldt)*ldt)) ) ABORT("Calloc fails for tempv2d[]."); tempU2d = tempv2d + ldt*ldt; if ( !(indirect = intMalloc_dist(ldt)) ) ABORT("Malloc fails for indirect[]."); k = CEILING( nsupers, Pr ); /* Number of local block rows */ if ( !(iuip = intMalloc_dist(k)) ) ABORT("Malloc fails for iuip[]."); if ( !(ruip = intMalloc_dist(k)) ) ABORT("Malloc fails for ruip[]."); /* --------------------------------------------------------------- Handle the first block column separately to start the pipeline. --------------------------------------------------------------- */ if ( mycol == 0 ) { pzgstrf2(options, 0, thresh, Glu_persist, grid, Llu, stat, info); scp = &grid->rscp; /* The scope of process row. */ /* Process column *kcol* multicasts numeric values of L(:,k) to process rows. */ lsub = Lrowind_bc_ptr[0]; lusup = Lnzval_bc_ptr[0]; if ( lsub ) { msgcnt[0] = lsub[1] + BC_HEADER + lsub[0]*LB_DESCRIPTOR; msgcnt[1] = lsub[1] * SuperSize( 0 ); } else { msgcnt[0] = msgcnt[1] = 0; } for (pj = 0; pj < Pc; ++pj) { if ( ToSendR[0][pj] != EMPTY ) { #if ( PROFlevel>=1 ) TIC(t1); #endif MPI_Isend( lsub, msgcnt[0], mpi_int_t, pj, 0, scp->comm, &send_req[pj] ); MPI_Isend( lusup, msgcnt[1], SuperLU_MPI_DOUBLE_COMPLEX, pj, 1, scp->comm, &send_req[pj+Pc] ); #if ( DEBUGlevel>=2 ) printf("(%d) Send L(:,%4d): lsub %4d, lusup %4d to Pc %2d\n", iam, 0, msgcnt[0], msgcnt[1], pj); #endif #if ( PROFlevel>=1 ) TOC(t2, t1); stat->utime[COMM] += t2; msg_cnt += 2; msg_vol += msgcnt[0]*iword + msgcnt[1]*zword; #endif } } /* for pj ... */ } else { /* Post immediate receives. */ if ( ToRecv[0] >= 1 ) { /* Recv block column L(:,0). */ scp = &grid->rscp; /* The scope of process row. */ MPI_Irecv( Lsub_buf_2[0], Llu->bufmax[0], mpi_int_t, 0, 0, scp->comm, &recv_req[0] ); MPI_Irecv( Lval_buf_2[0], Llu->bufmax[1], SuperLU_MPI_DOUBLE_COMPLEX, 0, 1, scp->comm, &recv_req[1] ); #if ( DEBUGlevel>=2 ) printf("(%d) Post Irecv L(:,%4d)\n", iam, 0); #endif } } /* if mycol == 0 */ /* ------------------------------------------ MAIN LOOP: Loop through all block columns. ------------------------------------------ */ for (k = 0; k < nsupers; ++k) { knsupc = SuperSize( k ); krow = PROW( k, grid ); kcol = PCOL( k, grid ); if ( mycol == kcol ) { lk = LBj( k, grid ); /* Local block number. */ for (pj = 0; pj < Pc; ++pj) { /* Wait for Isend to complete before using lsub/lusup. */ if ( ToSendR[lk][pj] != EMPTY ) { MPI_Wait( &send_req[pj], &status ); MPI_Wait( &send_req[pj+Pc], &status ); } } lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; } else { if ( ToRecv[k] >= 1 ) { /* Recv block column L(:,k). */ scp = &grid->rscp; /* The scope of process row. */ #if ( PROFlevel>=1 ) TIC(t1); #endif /*probe_recv(iam, kcol, (4*k)%NTAGS, mpi_int_t, scp->comm, Llu->bufmax[0]);*/ /*MPI_Recv( Lsub_buf, Llu->bufmax[0], mpi_int_t, kcol, (4*k)%NTAGS, scp->comm, &status );*/ MPI_Wait( &recv_req[0], &status ); MPI_Get_count( &status, mpi_int_t, &msgcnt[0] ); /*probe_recv(iam, kcol, (4*k+1)%NTAGS, SuperLU_MPI_DOUBLE_COMPLEX, scp->comm, Llu->bufmax[1]);*/ /*MPI_Recv( Lval_buf, Llu->bufmax[1], SuperLU_MPI_DOUBLE_COMPLEX, kcol, (4*k+1)%NTAGS, scp->comm, &status );*/ MPI_Wait( &recv_req[1], &status ); MPI_Get_count( &status, SuperLU_MPI_DOUBLE_COMPLEX, &msgcnt[1] ); #if ( PROFlevel>=1 ) TOC(t2, t1); stat->utime[COMM] += t2; #endif #if ( DEBUGlevel>=2 ) printf("(%d) Recv L(:,%4d): lsub %4d, lusup %4d from Pc %2d\n", iam, k, msgcnt[0], msgcnt[1], kcol); fflush(stdout); #endif lsub = Lsub_buf_2[k%2]; lusup = Lval_buf_2[k%2]; #if ( PRNTlevel==3 ) ++total_msg; if ( !msgcnt[0] ) ++zero_msg; #endif } else msgcnt[0] = 0; } /* if mycol = Pc(k) */ scp = &grid->cscp; /* The scope of process column. */ if ( myrow == krow ) { /* Parallel triangular solve across process row *krow* -- U(k,j) = L(k,k) \ A(k,j). */ #ifdef _CRAY pzgstrs2(n, k, Glu_persist, grid, Llu, stat, ftcs1, ftcs2, ftcs3); #else pzgstrs2(n, k, Glu_persist, grid, Llu, stat); #endif /* Multicasts U(k,:) to process columns. */ lk = LBi( k, grid ); usub = Ufstnz_br_ptr[lk]; uval = Unzval_br_ptr[lk]; if ( usub ) { msgcnt[2] = usub[2]; msgcnt[3] = usub[1]; } else { msgcnt[2] = msgcnt[3] = 0; } if ( ToSendD[lk] == YES ) { for (pi = 0; pi < Pr; ++pi) { if ( pi != myrow ) { #if ( PROFlevel>=1 ) TIC(t1); #endif MPI_Send( usub, msgcnt[2], mpi_int_t, pi, (4*k+2)%NTAGS, scp->comm); MPI_Send( uval, msgcnt[3], SuperLU_MPI_DOUBLE_COMPLEX, pi, (4*k+3)%NTAGS, scp->comm); #if ( PROFlevel>=1 ) TOC(t2, t1); stat->utime[COMM] += t2; msg_cnt += 2; msg_vol += msgcnt[2]*iword + msgcnt[3]*zword; #endif #if ( DEBUGlevel>=2 ) printf("(%d) Send U(%4d,:) to Pr %2d\n", iam, k, pi); #endif } /* if pi ... */ } /* for pi ... */ } /* if ToSendD ... */ } else { /* myrow != krow */ if ( ToRecv[k] == 2 ) { /* Recv block row U(k,:). */ #if ( PROFlevel>=1 ) TIC(t1); #endif /*probe_recv(iam, krow, (4*k+2)%NTAGS, mpi_int_t, scp->comm, Llu->bufmax[2]);*/ MPI_Recv( Usub_buf, Llu->bufmax[2], mpi_int_t, krow, (4*k+2)%NTAGS, scp->comm, &status ); MPI_Get_count( &status, mpi_int_t, &msgcnt[2] ); /*probe_recv(iam, krow, (4*k+3)%NTAGS, SuperLU_MPI_DOUBLE_COMPLEX, scp->comm, Llu->bufmax[3]);*/ MPI_Recv( Uval_buf, Llu->bufmax[3], SuperLU_MPI_DOUBLE_COMPLEX, krow, (4*k+3)%NTAGS, scp->comm, &status ); MPI_Get_count( &status, SuperLU_MPI_DOUBLE_COMPLEX, &msgcnt[3] ); #if ( PROFlevel>=1 ) TOC(t2, t1); stat->utime[COMM] += t2; #endif usub = Usub_buf; uval = Uval_buf; #if ( DEBUGlevel>=2 ) printf("(%d) Recv U(%4d,:) from Pr %2d\n", iam, k, krow); #endif #if ( PRNTlevel==3 ) ++total_msg; if ( !msgcnt[2] ) ++zero_msg; #endif } else msgcnt[2] = 0; } /* if myrow == Pr(k) */ /* * Parallel rank-k update; pair up blocks L(i,k) and U(k,j). * for (j = k+1; k < N; ++k) { * for (i = k+1; i < N; ++i) * if ( myrow == PROW( i, grid ) && mycol == PCOL( j, grid ) * && L(i,k) != 0 && U(k,j) != 0 ) * A(i,j) = A(i,j) - L(i,k) * U(k,j); */ msg0 = msgcnt[0]; msg2 = msgcnt[2]; if ( msg0 && msg2 ) { /* L(:,k) and U(k,:) are not empty. */ nsupr = lsub[1]; /* LDA of lusup. */ if ( myrow == krow ) { /* Skip diagonal block L(k,k). */ lptr0 = BC_HEADER + LB_DESCRIPTOR + lsub[BC_HEADER+1]; luptr0 = knsupc; nlb = lsub[0] - 1; } else { lptr0 = BC_HEADER; luptr0 = 0; nlb = lsub[0]; } lptr = lptr0; for (lb = 0; lb < nlb; ++lb) { /* Initialize block row pointers. */ ib = lsub[lptr]; lib = LBi( ib, grid ); iuip[lib] = BR_HEADER; ruip[lib] = 0; lptr += LB_DESCRIPTOR + lsub[lptr+1]; } nub = usub[0]; /* Number of blocks in the block row U(k,:) */ iukp = BR_HEADER; /* Skip header; Pointer to index[] of U(k,:) */ rukp = 0; /* Pointer to nzval[] of U(k,:) */ klst = FstBlockC( k+1 ); /* --------------------------------------------------- Update the first block column A(:,k+1). --------------------------------------------------- */ jb = usub[iukp]; /* Global block number of block U(k,j). */ if ( jb == k+1 ) { /* First update (k+1)-th block. */ --nub; lptr = lptr0; luptr = luptr0; ljb = LBj( jb, grid ); /* Local block number of U(k,j). */ nsupc = SuperSize( jb ); iukp += UB_DESCRIPTOR; /* Start fstnz of block U(k,j). */ /* Prepare to call DGEMM. */ jj = iukp; while ( usub[jj] == klst ) ++jj; ldu = klst - usub[jj++]; ncols = 1; full = 1; for (; jj < iukp+nsupc; ++jj) { segsize = klst - usub[jj]; if ( segsize ) { ++ncols; if ( segsize != ldu ) full = 0; if ( segsize > ldu ) ldu = segsize; } } #if ( DEBUGlevel>=3 ) ++num_update; #endif if ( full ) { tempu = &uval[rukp]; } else { /* Copy block U(k,j) into tempU2d. */ #if ( DEBUGlevel>=3 ) printf("(%d) full=%d,k=%d,jb=%d,ldu=%d,ncols=%d,nsupc=%d\n", iam, full, k, jb, ldu, ncols, nsupc); ++num_copy; #endif tempu = tempU2d; for (jj = iukp; jj < iukp+nsupc; ++jj) { segsize = klst - usub[jj]; if ( segsize ) { lead_zero = ldu - segsize; for (i = 0; i < lead_zero; ++i) tempu[i] = zero; tempu += lead_zero; for (i = 0; i < segsize; ++i) tempu[i] = uval[rukp+i]; rukp += segsize; tempu += segsize; } } tempu = tempU2d; rukp -= usub[iukp - 1]; /* Return to start of U(k,j). */ } /* if full ... */ for (lb = 0; lb < nlb; ++lb) { ib = lsub[lptr]; /* Row block L(i,k). */ nbrow = lsub[lptr+1]; /* Number of full rows. */ lptr += LB_DESCRIPTOR; /* Skip descriptor. */ tempv = tempv2d; #ifdef _CRAY CGEMM(ftcs, ftcs, &nbrow, &ncols, &ldu, &alpha, &lusup[luptr+(knsupc-ldu)*nsupr], &nsupr, tempu, &ldu, &beta, tempv, &ldt); #elif defined (USE_VENDOR_BLAS) zgemm_("N", "N", &nbrow, &ncols, &ldu, &alpha, &lusup[luptr+(knsupc-ldu)*nsupr], &nsupr, tempu, &ldu, &beta, tempv, &ldt, 1, 1); #else zgemm_("N", "N", &nbrow, &ncols, &ldu, &alpha, &lusup[luptr+(knsupc-ldu)*nsupr], &nsupr, tempu, &ldu, &beta, tempv, &ldt); #endif stat->ops[FACT] += 8 * nbrow * ldu * ncols; /* Now gather the result into the destination block. */ if ( ib < jb ) { /* A(i,j) is in U. */ ilst = FstBlockC( ib+1 ); lib = LBi( ib, grid ); index = Ufstnz_br_ptr[lib]; ijb = index[iuip[lib]]; while ( ijb < jb ) { /* Search for dest block. */ ruip[lib] += index[iuip[lib]+1]; iuip[lib] += UB_DESCRIPTOR + SuperSize( ijb ); ijb = index[iuip[lib]]; } iuip[lib] += UB_DESCRIPTOR; /* Skip descriptor. */ tempv = tempv2d; for (jj = 0; jj < nsupc; ++jj) { segsize = klst - usub[iukp + jj]; fnz = index[iuip[lib]++]; if ( segsize ) { /* Nonzero segment in U(k.j). */ ucol = &Unzval_br_ptr[lib][ruip[lib]]; for (i = 0, it = 0; i < nbrow; ++i) { rel = lsub[lptr + i] - fnz; z_sub(&ucol[rel], &ucol[rel], &tempv[it]); ++it; } tempv += ldt; } ruip[lib] += ilst - fnz; } } else { /* A(i,j) is in L. */ index = Lrowind_bc_ptr[ljb]; ldv = index[1]; /* LDA of the dest lusup. */ lptrj = BC_HEADER; luptrj = 0; ijb = index[lptrj]; while ( ijb != ib ) { /* Search for dest block -- blocks are not ordered! */ luptrj += index[lptrj+1]; lptrj += LB_DESCRIPTOR + index[lptrj+1]; ijb = index[lptrj]; } /* * Build indirect table. This is needed because the * indices are not sorted. */ fnz = FstBlockC( ib ); lptrj += LB_DESCRIPTOR; for (i = 0; i < index[lptrj-1]; ++i) { rel = index[lptrj + i] - fnz; indirect[rel] = i; } nzval = Lnzval_bc_ptr[ljb] + luptrj; tempv = tempv2d; for (jj = 0; jj < nsupc; ++jj) { segsize = klst - usub[iukp + jj]; if ( segsize ) { /*#pragma _CRI cache_bypass nzval,tempv*/ for (it = 0, i = 0; i < nbrow; ++i) { rel = lsub[lptr + i] - fnz; z_sub(&nzval[indirect[rel]], &nzval[indirect[rel]], &tempv[it]); ++it; } tempv += ldt; } nzval += ldv; } } /* if ib < jb ... */ lptr += nbrow; luptr += nbrow; } /* for lb ... */ rukp += usub[iukp - 1]; /* Move to block U(k,j+1) */ iukp += nsupc; } /* if jb == k+1 */ } /* if L(:,k) and U(k,:) not empty */ if ( k+1 < nsupers ) { kcol = PCOL( k+1, grid ); if ( mycol == kcol ) { /* Factor diagonal and subdiagonal blocks and test for exact singularity. */ pzgstrf2(options, k+1, thresh, Glu_persist, grid, Llu, stat, info); /* Process column *kcol+1* multicasts numeric values of L(:,k+1) to process rows. */ lk = LBj( k+1, grid ); /* Local block number. */ lsub1 = Lrowind_bc_ptr[lk]; if ( lsub1 ) { msgcnt[0] = lsub1[1] + BC_HEADER + lsub1[0]*LB_DESCRIPTOR; msgcnt[1] = lsub1[1] * SuperSize( k+1 ); } else { msgcnt[0] = 0; msgcnt[1] = 0; } scp = &grid->rscp; /* The scope of process row. */ for (pj = 0; pj < Pc; ++pj) { if ( ToSendR[lk][pj] != EMPTY ) { lusup1 = Lnzval_bc_ptr[lk]; #if ( PROFlevel>=1 ) TIC(t1); #endif MPI_Isend( lsub1, msgcnt[0], mpi_int_t, pj, (4*(k+1))%NTAGS, scp->comm, &send_req[pj] ); MPI_Isend( lusup1, msgcnt[1], SuperLU_MPI_DOUBLE_COMPLEX, pj, (4*(k+1)+1)%NTAGS, scp->comm, &send_req[pj+Pc] ); #if ( PROFlevel>=1 ) TOC(t2, t1); stat->utime[COMM] += t2; msg_cnt += 2; msg_vol += msgcnt[0]*iword + msgcnt[1]*zword; #endif #if ( DEBUGlevel>=2 ) printf("(%d) Send L(:,%4d): lsub %4d, lusup %4d to Pc %2d\n", iam, k+1, msgcnt[0], msgcnt[1], pj); #endif } } /* for pj ... */ } else { /* Post Recv of block column L(:,k+1). */ if ( ToRecv[k+1] >= 1 ) { scp = &grid->rscp; /* The scope of process row. */ MPI_Irecv(Lsub_buf_2[(k+1)%2], Llu->bufmax[0], mpi_int_t, kcol, (4*(k+1))%NTAGS, scp->comm, &recv_req[0]); MPI_Irecv(Lval_buf_2[(k+1)%2], Llu->bufmax[1], SuperLU_MPI_DOUBLE_COMPLEX, kcol, (4*(k+1)+1)%NTAGS, scp->comm, &recv_req[1]); #if ( DEBUGlevel>=2 ) printf("(%d) Post Irecv L(:,%4d)\n", iam, k+1); #endif } } /* if mycol == Pc(k+1) */ } /* if k+1 < nsupers */ if ( msg0 && msg2 ) { /* L(:,k) and U(k,:) are not empty. */ /* --------------------------------------------------- Update all other blocks using block row U(k,:) --------------------------------------------------- */ for (j = 0; j < nub; ++j) { lptr = lptr0; luptr = luptr0; jb = usub[iukp]; /* Global block number of block U(k,j). */ ljb = LBj( jb, grid ); /* Local block number of U(k,j). */ nsupc = SuperSize( jb ); iukp += UB_DESCRIPTOR; /* Start fstnz of block U(k,j). */ /* Prepare to call DGEMM. */ jj = iukp; while ( usub[jj] == klst ) ++jj; ldu = klst - usub[jj++]; ncols = 1; full = 1; for (; jj < iukp+nsupc; ++jj) { segsize = klst - usub[jj]; if ( segsize ) { ++ncols; if ( segsize != ldu ) full = 0; if ( segsize > ldu ) ldu = segsize; } } #if ( DEBUGlevel>=3 ) printf("(%d) full=%d,k=%d,jb=%d,ldu=%d,ncols=%d,nsupc=%d\n", iam, full, k, jb, ldu, ncols, nsupc); ++num_update; #endif if ( full ) { tempu = &uval[rukp]; } else { /* Copy block U(k,j) into tempU2d. */ #if ( DEBUGlevel>=3 ) ++num_copy; #endif tempu = tempU2d; for (jj = iukp; jj < iukp+nsupc; ++jj) { segsize = klst - usub[jj]; if ( segsize ) { lead_zero = ldu - segsize; for (i = 0; i < lead_zero; ++i) tempu[i] = zero; tempu += lead_zero; for (i = 0; i < segsize; ++i) tempu[i] = uval[rukp+i]; rukp += segsize; tempu += segsize; } } tempu = tempU2d; rukp -= usub[iukp - 1]; /* Return to start of U(k,j). */ } /* if full ... */ for (lb = 0; lb < nlb; ++lb) { ib = lsub[lptr]; /* Row block L(i,k). */ nbrow = lsub[lptr+1]; /* Number of full rows. */ lptr += LB_DESCRIPTOR; /* Skip descriptor. */ tempv = tempv2d; #ifdef _CRAY CGEMM(ftcs, ftcs, &nbrow, &ncols, &ldu, &alpha, &lusup[luptr+(knsupc-ldu)*nsupr], &nsupr, tempu, &ldu, &beta, tempv, &ldt); #elif defined (USE_VENDOR_BLAS) zgemm_("N", "N", &nbrow, &ncols, &ldu, &alpha, &lusup[luptr+(knsupc-ldu)*nsupr], &nsupr, tempu, &ldu, &beta, tempv, &ldt, 1, 1); #else zgemm_("N", "N", &nbrow, &ncols, &ldu, &alpha, &lusup[luptr+(knsupc-ldu)*nsupr], &nsupr, tempu, &ldu, &beta, tempv, &ldt); #endif stat->ops[FACT] += 8 * nbrow * ldu * ncols; /* Now gather the result into the destination block. */ if ( ib < jb ) { /* A(i,j) is in U. */ ilst = FstBlockC( ib+1 ); lib = LBi( ib, grid ); index = Ufstnz_br_ptr[lib]; ijb = index[iuip[lib]]; while ( ijb < jb ) { /* Search for dest block. */ ruip[lib] += index[iuip[lib]+1]; iuip[lib] += UB_DESCRIPTOR + SuperSize( ijb ); ijb = index[iuip[lib]]; } /* Skip descriptor. Now point to fstnz index of block U(i,j). */ iuip[lib] += UB_DESCRIPTOR; tempv = tempv2d; for (jj = 0; jj < nsupc; ++jj) { segsize = klst - usub[iukp + jj]; fnz = index[iuip[lib]++]; if ( segsize ) { /* Nonzero segment in U(k.j). */ ucol = &Unzval_br_ptr[lib][ruip[lib]]; for (i = 0 ; i < nbrow; ++i) { rel = lsub[lptr + i] - fnz; z_sub(&ucol[rel], &ucol[rel], &tempv[i]); } tempv += ldt; } ruip[lib] += ilst - fnz; } } else { /* A(i,j) is in L. */ index = Lrowind_bc_ptr[ljb]; ldv = index[1]; /* LDA of the dest lusup. */ lptrj = BC_HEADER; luptrj = 0; ijb = index[lptrj]; while ( ijb != ib ) { /* Search for dest block -- blocks are not ordered! */ luptrj += index[lptrj+1]; lptrj += LB_DESCRIPTOR + index[lptrj+1]; ijb = index[lptrj]; } /* * Build indirect table. This is needed because the * indices are not sorted for the L blocks. */ fnz = FstBlockC( ib ); lptrj += LB_DESCRIPTOR; for (i = 0; i < index[lptrj-1]; ++i) { rel = index[lptrj + i] - fnz; indirect[rel] = i; } nzval = Lnzval_bc_ptr[ljb] + luptrj; tempv = tempv2d; for (jj = 0; jj < nsupc; ++jj) { segsize = klst - usub[iukp + jj]; if ( segsize ) { /*#pragma _CRI cache_bypass nzval,tempv*/ for (i = 0; i < nbrow; ++i) { rel = lsub[lptr + i] - fnz; z_sub(&nzval[indirect[rel]], &nzval[indirect[rel]], &tempv[i]); } tempv += ldt; } nzval += ldv; } } /* if ib < jb ... */ lptr += nbrow; luptr += nbrow; } /* for lb ... */ rukp += usub[iukp - 1]; /* Move to block U(k,j+1) */ iukp += nsupc; } /* for j ... */ } /* if k L(:,k) and U(k,:) are not empty */ } /* ------------------------------------------ END MAIN LOOP: for k = ... ------------------------------------------ */ if ( Pr*Pc > 1 ) { SUPERLU_FREE(Lsub_buf_2[0]); /* also free Lsub_buf_2[1] */ SUPERLU_FREE(Lval_buf_2[0]); /* also free Lval_buf_2[1] */ if ( Llu->bufmax[2] != 0 ) SUPERLU_FREE(Usub_buf); if ( Llu->bufmax[3] != 0 ) SUPERLU_FREE(Uval_buf); SUPERLU_FREE(send_req); } SUPERLU_FREE(Llu->ujrow); SUPERLU_FREE(tempv2d); SUPERLU_FREE(indirect); SUPERLU_FREE(iuip); SUPERLU_FREE(ruip); /* Prepare error message. */ if ( *info == 0 ) *info = n + 1; #if ( PROFlevel>=1 ) TIC(t1); #endif MPI_Allreduce( info, &iinfo, 1, mpi_int_t, MPI_MIN, grid->comm ); #if ( PROFlevel>=1 ) TOC(t2, t1); stat->utime[COMM] += t2; { float msg_vol_max, msg_vol_sum, msg_cnt_max, msg_cnt_sum; MPI_Reduce( &msg_cnt, &msg_cnt_sum, 1, MPI_FLOAT, MPI_SUM, 0, grid->comm ); MPI_Reduce( &msg_cnt, &msg_cnt_max, 1, MPI_FLOAT, MPI_MAX, 0, grid->comm ); MPI_Reduce( &msg_vol, &msg_vol_sum, 1, MPI_FLOAT, MPI_SUM, 0, grid->comm ); MPI_Reduce( &msg_vol, &msg_vol_max, 1, MPI_FLOAT, MPI_MAX, 0, grid->comm ); if ( !iam ) { printf("\tPZGSTRF comm stat:" "\tAvg\tMax\t\tAvg\tMax\n" "\t\t\tCount:\t%.0f\t%.0f\tVol(MB)\t%.2f\t%.2f\n", msg_cnt_sum/Pr/Pc, msg_cnt_max, msg_vol_sum/Pr/Pc*1e-6, msg_vol_max*1e-6); } } #endif if ( iinfo == n + 1 ) *info = 0; else *info = iinfo; #if ( PRNTlevel==3 ) MPI_Allreduce( &zero_msg, &iinfo, 1, mpi_int_t, MPI_SUM, grid->comm ); if ( !iam ) printf(".. # msg of zero size\t%d\n", iinfo); MPI_Allreduce( &total_msg, &iinfo, 1, mpi_int_t, MPI_SUM, grid->comm ); if ( !iam ) printf(".. # total msg\t%d\n", iinfo); #endif #if ( DEBUGlevel>=2 ) for (i = 0; i < Pr * Pc; ++i) { if ( iam == i ) { zPrintLblocks(iam, nsupers, grid, Glu_persist, Llu); zPrintUblocks(iam, nsupers, grid, Glu_persist, Llu); printf("(%d)\n", iam); PrintInt10("Recv", nsupers, Llu->ToRecv); } MPI_Barrier( grid->comm ); } #endif #if ( DEBUGlevel>=3 ) printf("(%d) num_copy=%d, num_update=%d\n", iam, num_copy, num_update); #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pzgstrf()"); #endif } /* PZGSTRF */ /************************************************************************/ /*! \brief * *
 * Purpose
 * =======
 *   Factor diagonal and subdiagonal blocks and test for exact singularity.
 *   Only the process column that owns block column *k* participates
 *   in the work.
 * 
 * Arguments
 * =========
 *
 * k      (input) int (global)
 *        The column number of the block column to be factorized.
 *
 * thresh (input) double (global)
 *        The threshold value = s_eps * anorm.
 *
 * Glu_persist (input) Glu_persist_t*
 *        Global data structures (xsup, supno) replicated on all processes.
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh.
 *
 * Llu    (input/output) LocalLU_t*
 *        Local data structures to store distributed L and U matrices.
 *
 * stat   (output) SuperLUStat_t*
 *        Record the statistics about the factorization.
 *        See SuperLUStat_t structure defined in util.h.
 *
 * info   (output) int*
 *        = 0: successful exit
 *        < 0: if info = -i, the i-th argument had an illegal value
 *        > 0: if info = i, U(i,i) is exactly zero. The factorization has
 *             been completed, but the factor U is exactly singular,
 *             and division by zero will occur if it is used to solve a
 *             system of equations.
 * 
*/ static void pzgstrf2 /************************************************************************/ ( superlu_options_t *options, int_t k, double thresh, Glu_persist_t *Glu_persist, gridinfo_t *grid, LocalLU_t *Llu, SuperLUStat_t *stat, int* info ) { int c, iam, l, pkk; int incx = 1, incy = 1; int nsupr; /* number of rows in the block (LDA) */ int luptr; int_t i, krow, j, jfst, jlst; int_t nsupc; /* number of columns in the block */ int_t *xsup = Glu_persist->xsup; doublecomplex *lusup, temp; doublecomplex *ujrow; doublecomplex one = {1.0, 0.0}, alpha = {-1.0, 0.0}; *info = 0; /* Quick return. */ /* Initialization. */ iam = grid->iam; krow = PROW( k, grid ); pkk = PNUM( PROW(k, grid), PCOL(k, grid), grid ); j = LBj( k, grid ); /* Local block number */ jfst = FstBlockC( k ); jlst = FstBlockC( k+1 ); lusup = Llu->Lnzval_bc_ptr[j]; nsupc = SuperSize( k ); if ( Llu->Lrowind_bc_ptr[j] ) nsupr = Llu->Lrowind_bc_ptr[j][1]; ujrow = Llu->ujrow; luptr = 0; /* Point to the diagonal entries. */ c = nsupc; for (j = 0; j < jlst - jfst; ++j) { /* Broadcast the j-th row (nsupc - j) elements to the process column. */ if ( iam == pkk ) { /* Diagonal process. */ i = luptr; if ( options->ReplaceTinyPivot == YES ) { if ( z_abs1(&lusup[i]) < thresh ) { /* Diagonal */ #if ( PRNTlevel>=2 ) printf("(%d) .. col %d, tiny pivot %e ", iam, jfst+j, lusup[i]); #endif /* Keep the replaced diagonal with the same sign. */ if ( lusup[i].r < 0 ) lusup[i].r = -thresh; else lusup[i].r = thresh; lusup[i].i = 0.0; #if ( PRNTlevel>=2 ) printf("replaced by %e\n", lusup[i]); #endif ++(stat->TinyPivots); } } for (l = 0; l < c; ++l, i += nsupr) ujrow[l] = lusup[i]; } #if 0 dbcast_col(ujrow, c, pkk, UjROW, grid, &c); #else MPI_Bcast(ujrow, c, SuperLU_MPI_DOUBLE_COMPLEX, krow, (grid->cscp).comm); /*bcast_tree(ujrow, c, SuperLU_MPI_DOUBLE_COMPLEX, krow, (24*k+j)%NTAGS, grid, COMM_COLUMN, &c);*/ #endif #if ( DEBUGlevel>=2 ) if ( k == 3329 && j == 2 ) { if ( iam == pkk ) { printf("..(%d) k %d, j %d: Send ujrow[0] %e\n",iam,k,j,ujrow[0]); } else { printf("..(%d) k %d, j %d: Recv ujrow[0] %e\n",iam,k,j,ujrow[0]); } } #endif if ( !lusup ) { /* Empty block column. */ --c; if ( ujrow[0].r == 0.0 && ujrow[0].i == 0.0 ) *info = j+jfst+1; continue; } /* Test for singularity. */ if ( ujrow[0].r == 0.0 && ujrow[0].i == 0.0 ) { *info = j+jfst+1; } else { /* Scale the j-th column of the matrix. */ z_div(&temp, &one, &ujrow[0]); if ( iam == pkk ) { for (i = luptr+1; i < luptr-j+nsupr; ++i) zz_mult(&lusup[i], &lusup[i], &temp); stat->ops[FACT] += 6*(nsupr-j-1) + 10; } else { for (i = luptr; i < luptr+nsupr; ++i) zz_mult(&lusup[i], &lusup[i], &temp); stat->ops[FACT] += 6*nsupr + 10; } } /* Rank-1 update of the trailing submatrix. */ if ( --c ) { if ( iam == pkk ) { l = nsupr - j - 1; #ifdef _CRAY CGERU(&l, &c, &alpha, &lusup[luptr+1], &incx, &ujrow[1], &incy, &lusup[luptr+nsupr+1], &nsupr); #else zgeru_(&l, &c, &alpha, &lusup[luptr+1], &incx, &ujrow[1], &incy, &lusup[luptr+nsupr+1], &nsupr); #endif stat->ops[FACT] += 8 * l * c; } else { #ifdef _CRAY CGERU(&nsupr, &c, &alpha, &lusup[luptr], &incx, &ujrow[1], &incy, &lusup[luptr+nsupr], &nsupr); #else zgeru_(&nsupr, &c, &alpha, &lusup[luptr], &incx, &ujrow[1], &incy, &lusup[luptr+nsupr], &nsupr); #endif stat->ops[FACT] += 8 * nsupr * c; } } /* Move to the next column. */ if ( iam == pkk ) luptr += nsupr + 1; else luptr += nsupr; } /* for j ... */ } /* PZGSTRF2 */ /************************************************************************/ static void pzgstrs2 /************************************************************************/ #ifdef _CRAY ( int_t m, int_t k, Glu_persist_t *Glu_persist, gridinfo_t *grid, LocalLU_t *Llu, SuperLUStat_t *stat, _fcd ftcs1, _fcd ftcs2, _fcd ftcs3 ) #else ( int_t m, int_t k, Glu_persist_t *Glu_persist, gridinfo_t *grid, LocalLU_t *Llu, SuperLUStat_t *stat ) #endif /* * Purpose * ======= * Perform parallel triangular solves * U(k,:) := A(k,:) \ L(k,k). * Only the process row that owns block row *k* participates * in the work. * * Arguments * ========= * * m (input) int (global) * Number of rows in the matrix. * * k (input) int (global) * The row number of the block row to be factorized. * * Glu_persist (input) Glu_persist_t* * Global data structures (xsup, supno) replicated on all processes. * * grid (input) gridinfo_t* * The 2D process mesh. * * Llu (input/output) LocalLU_t* * Local data structures to store distributed L and U matrices. * * stat (output) SuperLUStat_t* * Record the statistics about the factorization; * See SuperLUStat_t structure defined in util.h. * */ { int iam, pkk; int incx = 1; int nsupr; /* number of rows in the block L(:,k) (LDA) */ int segsize; int_t nsupc; /* number of columns in the block */ int_t luptr, iukp, rukp; int_t b, gb, j, klst, knsupc, lk, nb; int_t *xsup = Glu_persist->xsup; int_t *usub; doublecomplex *lusup, *uval; /* Quick return. */ lk = LBi( k, grid ); /* Local block number */ if ( !Llu->Unzval_br_ptr[lk] ) return; /* Initialization. */ iam = grid->iam; pkk = PNUM( PROW(k, grid), PCOL(k, grid), grid ); klst = FstBlockC( k+1 ); knsupc = SuperSize( k ); usub = Llu->Ufstnz_br_ptr[lk]; /* index[] of block row U(k,:) */ uval = Llu->Unzval_br_ptr[lk]; nb = usub[0]; iukp = BR_HEADER; rukp = 0; if ( iam == pkk ) { lk = LBj( k, grid ); nsupr = Llu->Lrowind_bc_ptr[lk][1]; /* LDA of lusup[] */ lusup = Llu->Lnzval_bc_ptr[lk]; } else { nsupr = Llu->Lsub_buf_2[k%2][1]; /* LDA of lusup[] */ lusup = Llu->Lval_buf_2[k%2]; } /* Loop through all the row blocks. */ for (b = 0; b < nb; ++b) { gb = usub[iukp]; nsupc = SuperSize( gb ); iukp += UB_DESCRIPTOR; /* Loop through all the segments in the block. */ for (j = 0; j < nsupc; ++j) { segsize = klst - usub[iukp++]; if ( segsize ) { /* Nonzero segment. */ luptr = (knsupc - segsize) * (nsupr + 1); #ifdef _CRAY CTRSV(ftcs1, ftcs2, ftcs3, &segsize, &lusup[luptr], &nsupr, &uval[rukp], &incx); #elif defined (USE_VENDOR_BLAS) ztrsv_("L", "N", "U", &segsize, &lusup[luptr], &nsupr, &uval[rukp], &incx, 1, 1, 1); #else ztrsv_("L", "N", "U", &segsize, &lusup[luptr], &nsupr, &uval[rukp], &incx); #endif stat->ops[FACT] += 4 * segsize * (segsize + 1) + 10 * segsize; /* complex division */ rukp += segsize; } } } /* for b ... */ } /* PZGSTRS2 */ static int probe_recv(int iam, int source, int tag, MPI_Datatype datatype, MPI_Comm comm, int buf_size) { MPI_Status status; int count; MPI_Probe( source, tag, comm, &status ); MPI_Get_count( &status, datatype, &count ); if ( count > buf_size ) { printf("(%d) Recv'ed count %d > buffer size $d\n", iam, count, buf_size); exit(-1); } return 0; } SuperLU_DIST_5.3.0/SRC/zreadtriple_noheader.c0000644013363400111340000001126713233431301017630 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief * */ #include #include "superlu_zdefs.h" #undef EXPAND_SYM /*! brief * *
 * Output parameters
 * =================
 *   (nzval, rowind, colptr): (*rowind)[*] contains the row subscripts of
 *      nonzeros in columns of matrix A; (*nzval)[*] the numerical values;
 *	column i of A is given by (*nzval)[k], k = (*rowind)[i],...,
 *      (*rowind)[i+1]-1.
 * 
*/ void zreadtriple_noheader(FILE *fp, int_t *m, int_t *n, int_t *nonz, doublecomplex **nzval, int_t **rowind, int_t **colptr) { int_t i, j, k, jsize, lasta, nnz, nz, new_nonz, minn = 100; doublecomplex *a, *val, vali; int_t *asub, *xa, *row, *col; int zero_base = 0, ret_val = 0; /* File format: Triplet in a line for each nonzero entry: * row col value * or row col real_part imaginary_part */ /* First pass: determine N and NNZ */ nz = *n = 0; #ifdef _LONGINT ret_val = fscanf(fp, "%ld%ld%lf%lf\n", &i, &j, &vali.r, &vali.i); #else ret_val = fscanf(fp, "%d%d%lf%lf\n", &i, &j, &vali.r, &vali.i); #endif while (ret_val != EOF) { *n = SUPERLU_MAX(*n, i); *n = SUPERLU_MAX(*n, j); minn = SUPERLU_MIN(minn, i); minn = SUPERLU_MIN(minn, j); ++nz; #ifdef _LONGINT ret_val = fscanf(fp, "%ld%ld%lf%lf\n", &i, &j, &vali.r, &vali.i); #else ret_val = fscanf(fp, "%d%d%lf%lf\n", &i, &j, &vali.r, &vali.i); #endif } if ( minn == 0 ) { /* zero-based indexing */ zero_base = 1; ++(*n); printf("triplet file: row/col indices are zero-based.\n"); } else { printf("triplet file: row/col indices are one-based.\n"); } *m = *n; *nonz = nz; rewind(fp); #ifdef EXPAND_SYM new_nonz = 2 * *nonz - *n; #else new_nonz = *nonz; #endif /* Second pass: read the actual matrix values */ printf("m %ld, n %ld, nonz %ld\n", *m, *n, *nonz); zallocateA_dist(*n, new_nonz, nzval, rowind, colptr); /* Allocate storage */ a = *nzval; asub = *rowind; xa = *colptr; if ( !(val = (doublecomplex *) SUPERLU_MALLOC(new_nonz * sizeof(doublecomplex))) ) ABORT("Malloc fails for val[]"); if ( !(row = (int_t *) SUPERLU_MALLOC(new_nonz * sizeof(int_t))) ) ABORT("Malloc fails for row[]"); if ( !(col = (int_t *) SUPERLU_MALLOC(new_nonz * sizeof(int_t))) ) ABORT("Malloc fails for col[]"); for (j = 0; j < *n; ++j) xa[j] = 0; /* Read into the triplet array from a file */ for (nnz = 0, nz = 0; nnz < *nonz; ++nnz) { #ifdef _LONGINT fscanf(fp, "%ld%ld%lf%lf\n", &row[nz], &col[nz], &val[nz].r, &val[nz].i); #else fscanf(fp, "%d%d%lf%lf\n", &row[nz], &col[nz], &val[nz].r, &val[nz].i); #endif if ( !zero_base ) { /* Change to 0-based indexing. */ --row[nz]; --col[nz]; } if (row[nz] < 0 || row[nz] >= *m || col[nz] < 0 || col[nz] >= *n /*|| val[nz] == 0.*/) { fprintf(stderr, "nz %d, (%d, %d) = %e out of bound, removed\n", nz, row[nz], col[nz], val[nz]); exit(-1); } else { ++xa[col[nz]]; #ifdef EXPAND_SYM if ( row[nz] != col[nz] ) { /* Excluding diagonal */ ++nz; row[nz] = col[nz-1]; col[nz] = row[nz-1]; val[nz] = val[nz-1]; ++xa[col[nz]]; } #endif ++nz; } } *nonz = nz; #ifdef EXPAND_SYM printf("new_nonz after symmetric expansion:\t%d\n", *nonz); #endif /* Initialize the array of column pointers */ k = 0; jsize = xa[0]; xa[0] = 0; for (j = 1; j < *n; ++j) { k += jsize; jsize = xa[j]; xa[j] = k; } /* Copy the triplets into the column oriented storage */ for (nz = 0; nz < *nonz; ++nz) { j = col[nz]; k = xa[j]; asub[k] = row[nz]; a[k] = val[nz]; ++xa[j]; } /* Reset the column pointers to the beginning of each column */ for (j = *n; j > 0; --j) xa[j] = xa[j-1]; xa[0] = 0; SUPERLU_FREE(val); SUPERLU_FREE(row); SUPERLU_FREE(col); #ifdef CHK_INPUT for (i = 0; i < *n; i++) { printf("Col %d, xa %d\n", i, xa[i]); for (k = xa[i]; k < xa[i+1]; k++) printf("%d\t%16.10f\n", asub[k], a[k]); } #endif } #if 0 void zreadrhs(int m, doublecomplex *b) { FILE *fp, *fopen(); int i, j; if ( !(fp = fopen("b.dat", "r")) ) { fprintf(stderr, "zreadrhs: file does not exist\n"); exit(-1); } for (i = 0; i < m; ++i) fscanf(fp, "%lf%lf\n", &(b[i].r), &(b[i].i)); fclose(fp); } #endif SuperLU_DIST_5.3.0/SRC/xerr_dist.c0000644013363400111340000000137113233431301015434 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief
 * -- Distributed SuperLU routine (version 4.3) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 *
 * Modified: November 21, 1999
 *
*/ #include #include "Cnames.h" /* xerbla */ int xerr_dist(char *srname, int *info) { printf("** On entry to %6s, parameter number %2d had an illegal value\n", srname, *info); return 0; } /* xerr_dist */ SuperLU_DIST_5.3.0/SRC/psymbfact.c0000644013363400111340000050427313233431301015432 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Implements parallel symbolic factorization * *
 * -- Parallel symbolic factorization routine  (version 2.3) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley - July 2003
 * INRIA France - January 2004
 * Laura Grigori
 *
 * November 1, 2007
 * Feburary 20, 2008
 * October 15, 2008
 * January 28, 2018
 *
 * The function symbfact_dist implements the parallel symbolic factorization
 * algorithm described in the paper:
 *
 * Parallel Symbolic Factorization for Sparse LU with Static Pivoting,
 * Laura Grigori, James W. Demmel and Xiaoye S. Li,
 * Pages 1289-1314, SIAM Journal on Scientific Computing, Volume 29, Issue 3.
 * 
*/ /* limits.h: the largest positive integer (INT_MAX) */ #include #include #include "superlu_ddefs.h" #include "psymbfact.h" /* * Internal protypes */ static int_t * intMalloc_symbfact(int_t ); static int_t * intCalloc_symbfact(int_t ); static int_t initParmsAndStats (psymbfact_stat_t *PS); static void estimate_memUsage (int_t, int, superlu_dist_mem_usage_t *, float *, float *, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, comm_symbfact_t *, psymbfact_stat_t *); static void symbfact_free (int, int, Llu_symbfact_t *, vtcsInfo_symbfact_t *, comm_symbfact_t *); static int_t denseSep_symbfact (int , int_t, int, int, int, int_t *, int_t *, int, int, int, int_t, int_t, int_t *, int_t *, int_t *, int_t *, int_t *, MPI_Comm, MPI_Comm *, Llu_symbfact_t *, Pslu_freeable_t *_freeable, vtcsInfo_symbfact_t *, comm_symbfact_t *, psymbfact_stat_t * ); static int_t dnsUpSeps_symbfact (int_t, int, int, int, int, int_t *, int_t *, int_t, Llu_symbfact_t *, Pslu_freeable_t *, vtcsInfo_symbfact_t *, comm_symbfact_t *, psymbfact_stat_t *, int_t *, int_t *, int_t *); static void intraLvl_symbfact (SuperMatrix *, int, int, int, int, int, int_t *, int_t *, int, int, int_t, int_t, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, comm_symbfact_t *, psymbfact_stat_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, MPI_Comm, MPI_Comm *); static void initLvl_symbfact (int_t, int, int_t, int_t, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *, MPI_Comm, int_t *, int_t, int_t); static void createComm (int, int, MPI_Comm *, MPI_Comm *); static void freeComm (int, int, MPI_Comm *, MPI_Comm *); static void domain_symbfact (SuperMatrix *, int, int, int, int, int, int_t *, int_t *, int_t, int_t, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, comm_symbfact_t *, psymbfact_stat_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *); static float allocPrune_domain (int_t, int_t, Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *); static float allocPrune_lvl (Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *); static int symbfact_alloc (int_t, int, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, comm_symbfact_t *, psymbfact_stat_t *); static float symbfact_mapVtcs (int, int, int, SuperMatrix *, int_t *, int_t *, Pslu_freeable_t *, vtcsInfo_symbfact_t *, int_t *, int_t, psymbfact_stat_t *); static void symbfact_distributeMatrix (int, int, int, SuperMatrix *, int_t *, int_t *, matrix_symbfact_t *, Pslu_freeable_t *, vtcsInfo_symbfact_t *, int_t *, MPI_Comm *); static int_t interLvl_symbfact (SuperMatrix *, int, int, int, int, int, int, int, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, Llu_symbfact_t *, Pslu_freeable_t*, comm_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *, MPI_Comm, MPI_Comm *); static float cntsVtcs (int_t, int, int, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, int_t *, int_t *, int_t *, psymbfact_stat_t *, MPI_Comm *); /************************************************************************/ float symbfact_dist /************************************************************************/ ( int nprocs_num, /* Input - no of processors */ int nprocs_symb, /* Input - no of processors for the symbolic factorization */ SuperMatrix *A, /* Input - distributed input matrix */ int_t *perm_c, /* Input - column permutation */ int_t *perm_r, /* Input - row permutation */ int_t *sizes, /* Input - sizes of each node in the separator tree */ int_t *fstVtxSep, /* Input - first vertex of each node in the tree */ Pslu_freeable_t *Pslu_freeable, /* Output - local L and U structure, global to local indexing information */ MPI_Comm *num_comm, /* Input - communicator for numerical factorization */ MPI_Comm *symb_comm, /* Input - communicator for symbolic factorization */ superlu_dist_mem_usage_t *symb_mem_usage ) { /*! \brief * *
 
 * Purpose
 * =======
 *   symbfact_dist() performs symbolic factorization of matrix A suitable
 *   for performing the supernodal Gaussian elimination with no pivoting (GEPP). 
 *   This routine computes the structure of one column of L and one row of U 
 *   at a time.  It uses:
 *        o distributed input matrix
 *        o supernodes
 *        o symmetric structure pruning
 *
 *
 * Arguments
 * =========
 *
 * nprocs_num (input) int
 *         Number of processors SuperLU_DIST is executed on, and the input 
 *         matrix is distributed on.
 *
 * nprocs_symb (input) int
 *         Number of processors on which the symbolic factorization is
 *         performed.  It is equal to the number of independent domains
 *         idenfied in the graph partitioning algorithm executed
 *         previously and has to be a power of 2.  It corresponds to
 *         number of leaves in the separator tree.
 *
 * A       (input) SuperMatrix*
 *         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The
 *         number of the linear equations is A->nrow.  Matrix A is
 *         distributed in NRformat_loc format.
 *         Matrix A is not yet permuted by perm_c.
 *
 * perm_c  (input) int_t*
 *	   Column permutation vector of size A->ncol, which defines the 
 *         permutation matrix Pc; perm_c[i] = j means column i of A is 
 *         in position j in A*Pc.
 *
 * perm_r  (input) int_t*
 *	   Row permutation vector of size A->nrow, which defines the 
 *         permutation matrix Pr; perm_r[i] = j means column i of A is 
 *         in position j in Pr*A.
 *
 * sizes   (input) int_t*
 *         Contains the number of vertices in each separator.
 *
 * fstVtxSep (input) int_t*
 *         Contains first vertex for each separator.
 *
 * Pslu_freeable (output) Pslu_freeable_t*
 *         Returns the local L and U structure, and global to local
 *         information on the indexing of the vertices.  Contains all
 *         the information necessary for performing the data
 *         distribution towards the numeric factorization.
 *				    
 * num_comm (input) MPI_Comm*
 *         Communicator for numerical factorization 
 *
 * symb_comm (input) MPI_Comm*
 *         Communicator for symbolic factorization 
 *
 * symb_mem_usage (input) superlu_dist_mem_usage_t *
 *         Statistics on memory usage.
 *
 * Return value
 * ============
 *   < 0, number of bytes allocated on return from the symbolic factorization.
 *   > 0, number of bytes allocated when out of memory.
 *
 * Sketch of the algorithm
 * =======================
 *
 *  Distrbute the vertices on the processors using a subtree to
 *  subcube algorithm.
 *
 *  Redistribute the structure of the input matrix A according to the
 *  subtree to subcube computed previously for the symbolic
 *  factorization routine.  This implies in particular a distribution
 *  from nprocs_num processors to nprocs_symb processors.
 *
 *  Perform symbolic factorization guided by the separator tree provided by
 *  a graph partitioning algorithm.  The symbolic factorization uses a 
 *  combined left-looking, right-looking approach. 
 * 
*/ NRformat_loc *Astore; int iam, szSep, fstP, lstP, npNode, nlvls, lvl, p, iSep, jSep; int iinfo; /* return code */ int_t m, n; int_t nextl, nextu, neltsZr, neltsTotal, nsuper_loc, szLGr, szUGr; int_t ind_blk, nsuper, vtx, min_mn, szsn; long long int nnzL, nnzU, nnzLU; float stat_loc[23], stat_glob[23], mem_glob[15]; Llu_symbfact_t Llu_symbfact; /* local L and U and pruned L and U data structures */ vtcsInfo_symbfact_t VInfo; /* local information on number of blocks, number of vertices in a block etc */ matrix_symbfact_t AS; /* temporary storage for the input matrix after redistribution */ comm_symbfact_t CS; /* information on communication */ /* relaxation parameters (for future release) and statistics collected during the symbolic factorization */ psymbfact_stat_t PS; /* temp array of size n, used as a marker by the subroutines */ int_t *tempArray; int_t i, j, k; int_t fstVtx, lstVtx, mark, fstVtx_lid, vtx_lid, maxNvtcsPProc; int_t nnz_asup_loc, nnz_ainf_loc, fill_rcmd; float totalMemLU, overestimMem; MPI_Comm *commLvls; /* maximum block size */ int_t maxSzBlk; float flinfo; #if ( PRNTlevel >= 1) float stat_msgs_l[10], stat_msgs_g[10]; #endif #if ( PROFlevel>=1 ) double t, t_symbFact[3], t_symbFact_loc[3]; double *time_lvlsT, *time_lvls, t1, t2, time_lvlsg[9]; #endif /* Initialization */ MPI_Comm_rank ((*num_comm), &iam); commLvls = NULL; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter psymbfact()"); #endif initParmsAndStats (&PS); if (nprocs_symb != 1) { if (!(commLvls = (MPI_Comm *) SUPERLU_MALLOC(2*nprocs_symb*sizeof(MPI_Comm)))) { fprintf (stderr, "Malloc fails for commLvls[]."); return (PS.allocMem); } PS.allocMem += 2 * nprocs_symb * sizeof(MPI_Comm); } nlvls = (int) LOG2( nprocs_num ) + 1; #if ( PROFlevel>=1 ) time_lvlsT = (double *) SUPERLU_MALLOC(3*nprocs_symb*(nlvls+1) * sizeof(double)); time_lvls = (double *) SUPERLU_MALLOC(3*(nlvls+1) * sizeof(double)); if (!time_lvls || !time_lvlsT) { fprintf (stderr, "Malloc fails for time_lvls[]."); return (PS.allocMem); } PS.allocMem += (3*nprocs_symb*(nlvls+1) + 3*(nlvls+1)) * sizeof(double); #endif VInfo.xlsub_nextLvl = 0; VInfo.xusub_nextLvl = 0; VInfo.maxSzBlk = sp_ienv_dist(3); maxSzBlk = VInfo.maxSzBlk; mark = EMPTY; nsuper_loc = 0; nextl = 0; nextu = 0; neltsZr = 0; neltsTotal = 0; m = A->nrow; n = A->ncol; min_mn = SUPERLU_MIN( m, n ); if (!(tempArray = intMalloc_symbfact(n))) { fprintf (stderr, "Malloc fails for tempArray[].\n"); return (PS.allocMem); } PS.allocMem += n * sizeof(int_t); #if ( PROFlevel>=1 ) t = SuperLU_timer_(); #endif /* Distribute vertices on processors */ if ((flinfo = symbfact_mapVtcs (iam, nprocs_num, nprocs_symb, A, fstVtxSep, sizes, Pslu_freeable, &VInfo, tempArray, maxSzBlk, &PS)) > 0) return (flinfo); maxNvtcsPProc = Pslu_freeable->maxNvtcsPProc; /* Redistribute matrix A on processors following the distribution found in symbfact_mapVtcs. Store the redistributed A temporarily into AS */ symbfact_distributeMatrix (iam, nprocs_num, nprocs_symb, A, perm_c, perm_r, &AS, Pslu_freeable, &VInfo, tempArray, num_comm); /* THE REST OF THE SYMBOLIC FACTORIZATION IS EXECUTED ONLY BY NPROCS_SYMB PROCESSORS */ if ( iam < nprocs_symb ) { #if ( PROFlevel>=1 ) t_symbFact_loc[0] = SuperLU_timer_() - t; t = SuperLU_timer_(); t_symbFact_loc[1] = t; #endif /* Allocate storage common to the symbolic factor routines */ if (iinfo = symbfact_alloc (n, nprocs_symb, Pslu_freeable, &Llu_symbfact, &VInfo, &CS, &PS)) return (PS.allocMem); /* Copy the redistributed input matrix AS at the end of the memory buffer allocated to store L and U. That is, copy (AS.x_ainf, AS.ind_ainf) in (xlsub, lsub), (AS.x_asup, AS.ind_asup) in (xusub, usub). Free the memory used to store the input matrix */ nnz_ainf_loc = VInfo.nnz_ainf_loc; nnz_asup_loc = VInfo.nnz_asup_loc; j = Llu_symbfact.szUsub - VInfo.nnz_asup_loc; k = Llu_symbfact.szLsub - VInfo.nnz_ainf_loc; for (i = 0; i <= VInfo.nvtcs_loc; i++) { Llu_symbfact.xusub[i] = AS.x_asup[i] + j; Llu_symbfact.xlsub[i] = AS.x_ainf[i] + k; } for (i = 0; i < VInfo.nnz_asup_loc; i++, j++) Llu_symbfact.usub[j] = AS.ind_asup[i]; for (i = 0; i < VInfo.nnz_ainf_loc; i++, k++) Llu_symbfact.lsub[k] = AS.ind_ainf[i]; SUPERLU_FREE( AS.x_ainf ); SUPERLU_FREE( AS.x_asup ); SUPERLU_FREE( AS.ind_ainf ); SUPERLU_FREE( AS.ind_asup ); if (nprocs_symb != 1) { createComm (iam, nprocs_symb, commLvls, symb_comm); #if ( PROFlevel>=1 ) t_symbFact_loc[2] = SuperLU_timer_(); #endif if ((flinfo = cntsVtcs (n, iam, nprocs_symb, Pslu_freeable, &Llu_symbfact, &VInfo, tempArray, fstVtxSep, sizes, &PS, commLvls)) > 0) return (flinfo); #if ( PROFlevel>=1 ) t_symbFact_loc[2] = SuperLU_timer_() - t_symbFact_loc[2]; #endif } /* set to EMPTY marker[] array */ for (i = 0; i < n; i++) tempArray[i] = EMPTY; szSep = nprocs_symb; iSep = 0; lvl = 0; while (szSep >= 1) { /* for each level in the separator tree */ npNode = nprocs_symb / szSep; fstP = 0; /* for each node in the level */ for (jSep = iSep; jSep < iSep + szSep; jSep++) { fstVtx = fstVtxSep[jSep]; lstVtx = fstVtx + sizes[jSep]; /* if this is the first level */ if (szSep == nprocs_symb) { /* compute symbolic factorization for my domain */ if (fstP == iam) { /* allocate storage for the pruned structures */ #if ( PROFlevel>=1 ) t1 = SuperLU_timer_(); #endif if ((flinfo = allocPrune_domain (fstVtx, lstVtx, &Llu_symbfact, &VInfo, &PS)) > 0) return (flinfo); if (fstVtx < lstVtx) VInfo.fstVtx_nextLvl = VInfo.begEndBlks_loc[2]; domain_symbfact (A, iam, lvl, szSep, iSep, jSep, sizes, fstVtxSep, fstVtx, lstVtx, Pslu_freeable, &Llu_symbfact, &VInfo, &CS, &PS, tempArray, &mark, &nextl, &nextu, &neltsZr, &neltsTotal, &nsuper_loc); PS.estimLSz = nextl; PS.estimUSz = nextu; if (nprocs_symb != 1) if((flinfo = allocPrune_lvl (&Llu_symbfact, &VInfo, &PS)) > 0) return (flinfo); #if ( PROFlevel>=1 ) t2 = SuperLU_timer_(); time_lvls[lvl] = 0.; time_lvls[lvl+1] = 0.; time_lvls[lvl + 2] = t2 - t1; #endif } } else { lstP = fstP + npNode; if (fstP <= iam && iam < lstP) { #if ( PROFlevel>=1 ) t1 = SuperLU_timer_(); #endif if (VInfo.filledSep != FILLED_SEPS) initLvl_symbfact(n, iam, fstVtx, lstVtx, Pslu_freeable, &Llu_symbfact, &VInfo, &PS, commLvls[jSep], tempArray, nextl, nextu); #if ( PROFlevel>=1 ) t2 = SuperLU_timer_(); time_lvls[3*lvl] = t2 - t1; #endif interLvl_symbfact (A, iam, lvl, szSep, fstP, lstP, iSep, jSep, sizes, fstVtxSep, &nextl, &nextu, &nsuper_loc, &mark, tempArray, &Llu_symbfact, Pslu_freeable, &CS, &VInfo, &PS, commLvls[jSep], symb_comm); #if ( PROFlevel>=1 ) t1 = SuperLU_timer_(); time_lvls[3*lvl+1] = t1 - t2; #endif if (VInfo.filledSep != FILLED_SEPS) intraLvl_symbfact (A, iam, lvl, szSep, iSep, jSep, sizes, fstVtxSep, fstP, lstP, fstVtx, lstVtx, Pslu_freeable, &Llu_symbfact, &VInfo, &CS, &PS, tempArray, &mark, &nextl, &nextu, &neltsZr, &neltsTotal, &nsuper_loc, commLvls[jSep], symb_comm); #if ( PROFlevel>=1 ) t2 = SuperLU_timer_(); time_lvls[3*lvl+2] = t2 - t1; #endif } } fstP += npNode; } iSep += szSep; szSep = szSep / 2; lvl ++; } SUPERLU_FREE( tempArray ); /* Set up global information and collect statistics */ if (PS.maxSzLPr < Llu_symbfact.indLsubPr) PS.maxSzLPr = Llu_symbfact.indLsubPr; if (PS.maxSzUPr < Llu_symbfact.indUsubPr) PS.maxSzUPr = Llu_symbfact.indUsubPr; Llu_symbfact.xlsub[VInfo.nvtcs_loc] = nextl; Llu_symbfact.xusub[VInfo.nvtcs_loc] = nextu; fill_rcmd = SUPERLU_MAX( nextl / (nnz_ainf_loc+1), nextu / (nnz_asup_loc+1)) + 1; Pslu_freeable->xsup_beg_loc = intMalloc_dist (nsuper_loc+1); Pslu_freeable->xsup_end_loc = intMalloc_dist (nsuper_loc+1); if (!Pslu_freeable->xsup_beg_loc || !Pslu_freeable->xsup_end_loc) { fprintf (stderr, "Malloc fails for xsup_beg_loc, xsup_end_loc."); return (PS.allocMem); } PS.allocMem += 2 * (nsuper_loc+1) * sizeof(int_t); maxNvtcsPProc = Pslu_freeable->maxNvtcsPProc; nnzL = 0; nnzU = 0; i = 0; nsuper = 0; ind_blk = 0; for (ind_blk = 0; ind_blk < VInfo.nblks_loc; ind_blk ++) { fstVtx = VInfo.begEndBlks_loc[2 * ind_blk]; lstVtx = VInfo.begEndBlks_loc[2 * ind_blk + 1]; fstVtx_lid = LOCAL_IND( Pslu_freeable->globToLoc[fstVtx] ); nsuper = Pslu_freeable->supno_loc[fstVtx_lid]; Pslu_freeable->xsup_beg_loc[nsuper] = fstVtx; szsn = 1; if (INT_MAX - nnzL <= Llu_symbfact.xlsub[fstVtx_lid + 1] - Llu_symbfact.xlsub[fstVtx_lid]) printf ("PE[%d] ERR nnzL %lld\n", iam, nnzL); if (INT_MAX - nnzU <= Llu_symbfact.xusub[fstVtx_lid + 1] - Llu_symbfact.xusub[fstVtx_lid]) printf ("PE[%d] ERR nnzU %lld\n", iam, nnzU); j = Llu_symbfact.xlsub[fstVtx_lid + 1] - Llu_symbfact.xlsub[fstVtx_lid]; k = Llu_symbfact.xusub[fstVtx_lid + 1] - Llu_symbfact.xusub[fstVtx_lid]; nnzL += j; nnzU += k; for (vtx = fstVtx + 1, vtx_lid = fstVtx_lid + 1; vtx < lstVtx; vtx++, vtx_lid ++) { if (Pslu_freeable->supno_loc[vtx_lid] != nsuper) { nsuper = Pslu_freeable->supno_loc[vtx_lid]; Pslu_freeable->xsup_end_loc[nsuper-1] = vtx; Pslu_freeable->xsup_beg_loc[nsuper] = vtx; szsn = 1; j = Llu_symbfact.xlsub[vtx_lid + 1] - Llu_symbfact.xlsub[vtx_lid]; k = Llu_symbfact.xusub[vtx_lid + 1] - Llu_symbfact.xusub[vtx_lid]; } else { szsn ++; } nnzL += j - szsn + 1; nnzU += k - szsn + 1; } Pslu_freeable->xsup_end_loc[nsuper] = lstVtx; } Pslu_freeable->supno_loc[VInfo.nvtcs_loc] = nsuper_loc; Pslu_freeable->nvtcs_loc = VInfo.nvtcs_loc; /* set up xsup data */ Pslu_freeable->lsub = Llu_symbfact.lsub; Pslu_freeable->xlsub = Llu_symbfact.xlsub; Pslu_freeable->usub = Llu_symbfact.usub; Pslu_freeable->xusub = Llu_symbfact.xusub; Pslu_freeable->szLsub = Llu_symbfact.szLsub; Pslu_freeable->szUsub = Llu_symbfact.szUsub; #if ( PROFlevel>=1 ) t_symbFact_loc[1] = SuperLU_timer_() - t_symbFact_loc[1]; #endif #if ( PRNTlevel>=1 ) estimate_memUsage (n, iam, symb_mem_usage, &totalMemLU, &overestimMem, Pslu_freeable, &Llu_symbfact, &VInfo, &CS, &PS); stat_loc[0] = (float) nnzL; stat_loc[1] = (float) nnzU; stat_loc[2] = (float) nsuper_loc; stat_loc[3] = (float) Pslu_freeable->xlsub[VInfo.nvtcs_loc]; stat_loc[4] = (float) Pslu_freeable->xusub[VInfo.nvtcs_loc]; stat_loc[5] = totalMemLU; stat_loc[6] = overestimMem; stat_loc[7] = totalMemLU - overestimMem; stat_loc[8] = (float) PS.maxSzBuf; stat_loc[9] = (float) PS.nDnsUpSeps; stat_loc[10] = (float) PS.nDnsCurSep; stat_loc[11] = (float) (Llu_symbfact.no_expand + Llu_symbfact.no_expcp + Llu_symbfact.no_expand_pr); stat_loc[12] = (float) Llu_symbfact.no_expand; stat_loc[13] = (float) Llu_symbfact.no_expcp; stat_loc[14] = (float) Llu_symbfact.no_expand_pr; stat_loc[15] = (float) fill_rcmd; stat_loc[16] = PS.nops; stat_loc[17] = PS.fill_pelt[1]; stat_loc[18] = PS.fill_pelt[4]; stat_loc[19] = PS.fill_pelt[0]; stat_loc[20] = PS.fill_pelt[2]; stat_loc[21] = PS.fill_pelt[3]; stat_loc[22] = PS.fill_pelt[5]; MPI_Reduce (stat_loc, stat_glob, 23, MPI_FLOAT, MPI_SUM, 0, (*symb_comm)); MPI_Reduce (&(stat_loc[5]), mem_glob, 14, MPI_FLOAT, MPI_MAX, 0, (*symb_comm)); fill_rcmd = (int_t) mem_glob[10]; PS.fill_pelt[0] = stat_glob[19]; PS.fill_pelt[1] = mem_glob[12]; PS.fill_pelt[2] = stat_glob[20]; PS.fill_pelt[3] = stat_glob[21]; PS.fill_pelt[4] = mem_glob[13]; PS.fill_pelt[5] = stat_glob[22]; if (PS.fill_pelt[2] == 0.) PS.fill_pelt[2] = 1.; if (PS.fill_pelt[5] == 0.) PS.fill_pelt[5] = 1.; #if ( PROFlevel>=1 ) MPI_Reduce (t_symbFact_loc, t_symbFact, 3, MPI_DOUBLE, MPI_MAX, 0, (*symb_comm)); MPI_Gather (time_lvls, 3 * nlvls, MPI_DOUBLE, time_lvlsT, 3 * nlvls , MPI_DOUBLE, 0, (*symb_comm)); #endif stat_msgs_l[0] = (float) PS.maxsz_msgSnd; stat_msgs_l[1] = (float) PS.maxsz_msgSnd; if (PS.maxsz_msgSnd < PS.maxsz_msgCol) stat_msgs_l[1] = PS.maxsz_msgCol; stat_msgs_l[2] = PS.no_shmSnd + PS.no_msgsSnd + PS.no_shmRcvd + PS.no_msgsRcvd; stat_msgs_l[3] = stat_msgs_l[2] + PS.no_msgsCol; stat_msgs_l[4] = stat_msgs_l[2]; stat_msgs_l[5] = stat_msgs_l[3]; stat_msgs_l[6] = PS.no_msgsSnd; stat_msgs_l[7] = PS.no_msgsSnd + PS.no_msgsCol; stat_msgs_l[8] = PS.sz_msgsSnd; stat_msgs_l[9] = PS.sz_msgsSnd + PS.sz_msgsCol; MPI_Reduce (stat_msgs_l, stat_msgs_g, 4, MPI_FLOAT, MPI_MAX, 0, (*symb_comm)); MPI_Reduce (&(stat_msgs_l[4]), &(stat_msgs_g[4]), 6, MPI_FLOAT, MPI_SUM, 0, (*symb_comm)); if (stat_msgs_g[6] == 0) stat_msgs_g[6] = 1; if (stat_msgs_g[7] == 0) stat_msgs_g[7] = 1; if (!iam) { nnzL = (long long) stat_glob[0]; nnzU = (long long) stat_glob[1]; nsuper = (int_t) stat_glob[2]; szLGr = (int_t) stat_glob[3]; szUGr = (int_t) stat_glob[4]; printf("\tMax szBlk %ld\n", (long long) VInfo.maxSzBlk); #if ( PRNTlevel>=2 ) printf("\t relax_gen %.2f, relax_curSep %.2f, relax_seps %.2f\n", PS.relax_gen, PS.relax_curSep, PS.relax_seps); #endif printf("\tParameters: fill mem %ld fill pelt %ld\n", (long long) sp_ienv_dist(6), (long long) PS.fill_par); printf("\tNonzeros in L %ld\n", nnzL); printf("\tNonzeros in U %ld\n", nnzU); nnzLU = nnzL + nnzU; printf("\tnonzeros in L+U-I %ld\n", nnzLU); printf("\tNo of supers %ld\n", (long long) nsuper); printf("\tSize of G(L) %ld\n", (long long) szLGr); printf("\tSize of G(U) %ld\n", (long long) szUGr); printf("\tSize of G(L+U) %ld\n", (long long) szLGr+szUGr); printf("\tParSYMBfact (MB) :\tL\\U MAX %.2f\tAVG %.2f\n", mem_glob[0]*1e-6, stat_glob[5]/nprocs_symb*1e-6); #if ( PRNTlevel>=2 ) printf("\tRL overestim (MB):\tL\\U MAX %.2f\tAVG %.2f\n", mem_glob[1]*1e-6, stat_glob[6]/nprocs_symb*1e-6); printf("\tsnd/rcv buffers (MB):\tL\\U MAX %.2f\tAVG %.2f\n", mem_glob[3]*1e-6, stat_glob[8]/nprocs_symb*1e-6); printf("\tSYMBfact 2*n+4*nvtcs_loc+2*maxNvtcsNds_loc:\tL\\U %.2f\n", (float) (2 * n * sizeof(int_t)) *1e-6); printf("\tint_t %d, int %d, long int %d, short %d, float %d, double %d\n", sizeof(int_t), sizeof(int), sizeof(long int), sizeof(short), sizeof(float), sizeof(double)); printf("\tDNS ALLSEPS:\t MAX %d\tAVG %.2f\n", (int_t) mem_glob[4], stat_glob[9]/nprocs_symb); printf("\tDNS CURSEP:\t MAX %d\tAVG %.2f\n\n", (int_t) mem_glob[5], stat_glob[10]/nprocs_symb); printf("\t MAX FILL Mem(L+U) / Mem(A) per processor %ld\n", fill_rcmd); printf("\t Per elt MAX %ld AVG %ld\n", (int_t) PS.fill_pelt[4], (int_t)(PS.fill_pelt[3]/PS.fill_pelt[5])); printf("\t Per elt RL MAX %ld AVG %ld\n", (int_t) PS.fill_pelt[1], (int_t)(PS.fill_pelt[0]/PS.fill_pelt[2])); printf("\tM Nops:\t MAX %.2f\tAVG %.2f\n", mem_glob[11]*1e-6, (stat_glob[16]/nprocs_symb)*1e-6); printf("\tEXPANSIONS: MAX/AVG\n"); printf("\tTOTAL: %d / %.2f\n", (int_t) mem_glob[6], stat_glob[11]/nprocs_symb); printf("\tREALLOC: %.f / %.2f RL_CP %.f / %.2f PR_CP %.f / %.2f\n", mem_glob[7], stat_glob[12]/nprocs_symb, mem_glob[8], stat_glob[13]/nprocs_symb, mem_glob[9], stat_glob[14]/nprocs_symb); printf ("\n\tDATA MSGS noMsgs*10^3 %.3f/%.3f size (MB) %.3f/%.3f \n", stat_msgs_g[2]*1e-3, stat_msgs_g[4]/nprocs_symb*1e-3, stat_msgs_g[0]*1e-6, stat_msgs_g[8] / stat_msgs_g[6]*1e-6); printf ("\tTOTAL MSGS noMsgs*10^3 %.3f/%.3f size (MB) %.3f/%.3f \n", stat_msgs_g[3]*1e-3, stat_msgs_g[5]/nprocs_symb*1e-3, stat_msgs_g[1]*1e-6, stat_msgs_g[9]/stat_msgs_g[7]*1e-6); #endif #if ( PROFlevel>=1 ) printf("Distribute matrix time = %8.3f\n", t_symbFact[0]); printf("Count vertices time = %8.3f\n", t_symbFact[2]); printf("Symbfact DIST time = %8.3f\n", t_symbFact[1]); printf("\nLvl\t Time\t Init\t Inter\t Intra\n"); time_lvlsg[0] = 0.; for (i = 0; i < nlvls; i++) { for (j = 1; j < 9; j++) time_lvlsg[j] = 0.; for (p = 0; p < nprocs_symb; p++) { k = p * 3 * nlvls; t = time_lvlsT[i*3+k] + time_lvlsT[i*3+k+1] + time_lvlsT[i*3+k+2]; if (t > time_lvlsg[1]) { time_lvlsg[1] = t; j = p; } time_lvlsg[2] += t; if (time_lvlsT[i*3+k] > time_lvlsg[3]) time_lvlsg[3] = time_lvlsT[i*3+k]; time_lvlsg[4] += time_lvlsT[i*3+k]; if (time_lvlsT[i*3+k+1] > time_lvlsg[5]) time_lvlsg[5] = time_lvlsT[i*3+k+1]; time_lvlsg[6] += time_lvlsT[i*3+k+1]; if (time_lvlsT[i*3+k+2] > time_lvlsg[7]) time_lvlsg[7] = time_lvlsT[i*3+k+2]; time_lvlsg[8] += time_lvlsT[i*3+k+2]; } time_lvlsg[0] += time_lvlsg[1]; printf ("%d \t%.3f/%.3f\t%.3f/%.3f\t%.3f/%.3f\t%.3f/%.3f\n", i, time_lvlsg[1], time_lvlsg[2] / nprocs_symb, time_lvlsg[3], time_lvlsg[4] / nprocs_symb, time_lvlsg[5], time_lvlsg[6] /nprocs_symb, time_lvlsg[7], time_lvlsg[8] / nprocs_symb); } printf("\t %8.3f \n", time_lvlsg[0]); #endif } #endif #if ( PROFlevel>=1 ) SUPERLU_FREE (time_lvls); SUPERLU_FREE (time_lvlsT); #endif symbfact_free (iam, nprocs_symb, &Llu_symbfact, &VInfo, &CS); } /* if (iam < nprocs_symb) */ else { /* update Pslu_freeable before returning */ Pslu_freeable->nvtcs_loc = 0; Pslu_freeable->xlsub = NULL; Pslu_freeable->lsub = NULL; Pslu_freeable->xusub = NULL; Pslu_freeable->usub = NULL; Pslu_freeable->supno_loc = NULL; Pslu_freeable->xsup_beg_loc = NULL; Pslu_freeable->xsup_end_loc = NULL; SUPERLU_FREE( tempArray ); PS.allocMem -= n * sizeof(int_t); } if (iam < nprocs_symb && nprocs_symb != 1) freeComm (iam, nprocs_symb, commLvls, symb_comm); if (commLvls != NULL) SUPERLU_FREE( commLvls ); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit psymbfact()"); #endif return (- PS.allocMem); } /* SYMBFACT_DIST */ static int_t initParmsAndStats ( psymbfact_stat_t *PS /* Output -statistics*/ ) /*! \brief *
 
 * Purpose
 * =======
 * Initialize relaxation parameters and statistics variables
 * 
*/ { int i; PS->nDnsCurSep = 0; PS->nDnsUpSeps = 0; PS->relax_gen = 1.0; PS->relax_curSep = 1.0; PS->relax_seps = 1.0; PS->fill_par = sp_ienv_dist(6); PS->nops = 0.; PS->no_shmSnd = 0.; PS->no_msgsSnd = 0.; PS->maxsz_msgSnd = 0; PS->sz_msgsSnd = 0.; PS->no_shmRcvd = 0.; PS->no_msgsRcvd = 0.; PS->maxsz_msgRcvd = 0; PS->sz_msgsRcvd = 0.; PS->no_msgsCol = 0.; PS->maxsz_msgCol = 0; PS->sz_msgsCol = 0.; for (i = 0; i < 6; i++) PS->fill_pelt[i] = 0.; PS->estimUSz = 0; PS->estimLSz = 0; PS->maxSzLPr = 0; PS->maxSzUPr = 0; PS->maxSzBuf = 0; PS->szDnsSep = 0; PS->allocMem = 0; return 0; } static float cntsVtcs ( int_t n, /* Input - order of the input matrix */ int iam, /* Input - my processor number */ int nprocs_symb, /* Input - no of processors for symbolic factorization */ Pslu_freeable_t *Pslu_freeable, /* Input -globToLoc and maxNvtcsPProc */ Llu_symbfact_t *Llu_symbfact, /* Input/Output -local L, U data structures */ vtcsInfo_symbfact_t *VInfo, /* Input - local info on vertices distribution */ int_t *tempArray, /* Input - temporary storage */ int_t *fstVtxSep, /* Input - first vertex of each node in the tree */ int_t *sizes, /* Input - sizes of each node in the tree */ psymbfact_stat_t *PS, /* Input/Output -statistics */ MPI_Comm *commLvls ) /*! \brief * *
 * Purpose
 * =======
 * 
 * Computes an estimation of the number of elements in columns of L
 * and rows of U.  Stores this information in cntelt_vtcs, and it will
 * be used in the right-looking symbolic factorization.
 * 
*/ { int fstP, lstP, szSep, npNode, i, j; int_t nvtcs_loc, ind_blk, vtx, vtx_lid, ii, jj, lv, vtx_elt, cur_blk; int_t fstVtx, lstVtx, fstVtx_blk, lstVtx_blk; int_t nelts, nelts_new_blk; int_t *xlsub, *lsub, *xusub, *usub, *globToLoc, maxNvtcsPProc; int_t *minElt_vtx, *cntelt_vtcs; /* Initialization */ xlsub = Llu_symbfact->xlsub; lsub = Llu_symbfact->lsub; xusub = Llu_symbfact->xusub; usub = Llu_symbfact->usub; cntelt_vtcs = Llu_symbfact->cntelt_vtcs; globToLoc = Pslu_freeable->globToLoc; nvtcs_loc = VInfo->nvtcs_loc; maxNvtcsPProc = Pslu_freeable->maxNvtcsPProc; if (Llu_symbfact->szLsub - VInfo->nnz_ainf_loc > n) minElt_vtx = lsub; else { /* allocate memory for minElt_vtx */ if (!(minElt_vtx = intMalloc_dist(n))) { fprintf(stderr, "Malloc fails for minElt_vtx[]."); return (PS->allocMem); } PS->allocMem += n * sizeof (int_t); } for (ii = 0; ii < n; ii++) tempArray[ii] = n; for (ii = 0; ii < nvtcs_loc; ii++) cntelt_vtcs[ii] = 0; szSep = nprocs_symb; i = 0; cur_blk = 0; vtx_lid = 0; while (szSep >= 1) { /* for each level in the separator tree */ npNode = nprocs_symb / szSep; fstP = 0; /* for each node in the level */ for (j = i; j < i + szSep; j++) { fstVtx = fstVtxSep[j]; lstVtx = fstVtx + sizes[j]; lstP = fstP + npNode; if (fstP <= iam && iam < lstP) { ind_blk = cur_blk; ii = vtx_lid; while (VInfo->begEndBlks_loc[ind_blk] < lstVtx && ind_blk < 2 * VInfo->nblks_loc) { fstVtx_blk = VInfo->begEndBlks_loc[ind_blk]; lstVtx_blk = VInfo->begEndBlks_loc[ind_blk + 1]; ind_blk += 2; for (vtx = fstVtx_blk; vtx < lstVtx_blk; vtx++, ii++) { for (jj = xlsub[ii]; jj < xlsub[ii+1]; jj++) { vtx_elt = lsub[jj]; if (tempArray[vtx_elt] == n) { tempArray[vtx_elt] = vtx; } } for (jj = xusub[ii]; jj < xusub[ii+1]; jj++) { vtx_elt = usub[jj]; if (tempArray[vtx_elt] == n) { tempArray[vtx_elt] = vtx; } } } } if (szSep == nprocs_symb) vtx_lid = ii; else { MPI_Allreduce (&(tempArray[fstVtx]), &(minElt_vtx[fstVtx]), (int) (n - fstVtx), mpi_int_t, MPI_MIN, commLvls[j]); #if ( PRNTlevel>=1 ) PS->no_msgsCol += (float) (2 * (int) LOG2( npNode )); PS->sz_msgsCol += (float) (n - fstVtx); if (PS->maxsz_msgCol < n - fstVtx) PS->maxsz_msgCol = n - fstVtx; #endif nelts = 0; for (ii = fstVtx; ii < lstVtx; ii++) tempArray[ii] = 0; for (ii = fstVtx; ii < n; ii++) { if (minElt_vtx[ii] != n) { if (minElt_vtx[ii] < fstVtx) nelts ++; else tempArray[minElt_vtx[ii]] ++; if (ii > lstVtx) tempArray[ii] = minElt_vtx[ii]; } } ind_blk = cur_blk; lv = fstVtx; while (VInfo->begEndBlks_loc[ind_blk] < lstVtx && ind_blk < 2 * VInfo->nblks_loc) { fstVtx_blk = VInfo->begEndBlks_loc[ind_blk]; lstVtx_blk = VInfo->begEndBlks_loc[ind_blk + 1]; ind_blk += 2; for (ii = lv; ii < fstVtx_blk; ii++) nelts += tempArray[ii]; lv = lstVtx_blk; nelts_new_blk = 0; for (vtx = fstVtx_blk; vtx < lstVtx_blk; vtx++, vtx_lid++) { nelts_new_blk += tempArray[vtx]; cntelt_vtcs[vtx_lid] = nelts; } nelts += nelts_new_blk; } } /* if (szSep != nprocs_symb) */ cur_blk = ind_blk; } fstP += npNode; } i += szSep; szSep = szSep / 2; } /* free memory */ if (minElt_vtx != lsub) { SUPERLU_FREE (minElt_vtx); PS->allocMem -= n * sizeof(int_t); } return (SUCCES_RET); } static float symbfact_mapVtcs ( int iam, /* Input -process number */ int nprocs_num, /* Input -number of processors */ int nprocs_symb, /* Input -number of procs for symbolic factorization */ SuperMatrix *A, /* Input -input distributed matrix A */ int_t *fstVtxSep, /* Input -first vertex in each separator */ int_t *sizes, /* Input -size of each separator in the separator tree */ Pslu_freeable_t *Pslu_freeable, /* Output -globToLoc and maxNvtcsPProc computed */ vtcsInfo_symbfact_t *VInfo, /* Output -local info on vertices distribution */ int_t *tempArray, /* Input -temp array of size n = order of the matrix */ int_t maxSzBlk, /* Input -maximum number of vertices in a block */ psymbfact_stat_t *PS /* Input/Output -statistics */ ) { /*! \brief * *
 * Purpose
 * =======
 *
 *  symbfact_mapVtcs maps the vertices of the graph of the input
 *  matrix A on nprocs_symb processors, using the separator tree
 *  returned by a graph partitioning algorithm from the previous step
 *  of the symbolic factorization.  The number of processors
 *  nprocs_symb must be a power of 2.
 *
 * Description of the algorithm
 * ============================
 *
 *  A subtree to subcube algorithm is used first to map the processors
 *  on the nodes of the separator tree.
 *
 *  For each node of the separator tree, its corresponding vertices
 *  are distributed on the processors affected to this node, using a
 *  block cyclic distribution.
 *
 *  After the distribution, fields of the VInfo structure are
 *  computed.  The array globToLoc and maxNvtcsPProc of Pslu_freeable
 *  are also computed.
 * 
*/ int szSep, npNode, firstP, p, iSep, jSep, ind_ap_s, ind_ap_d; int_t k, n, kk; int_t fstVtx, lstVtx; int_t fstVtxBlk, ind_blk; int_t noVtcsProc, noBlk; int_t nvtcs_loc; /* number of vertices owned by process iam */ int_t nblks_loc; /* no of blocks owned by process iam */ int_t *globToLoc; /* global indexing to local indexing */ int_t maxNvtcsPProc, maxNvtcsNds_loc, nvtcsNds_loc, maxNeltsVtx; int_t *begEndBlks_loc; /* begin and end vertex of each local block */ int_t *vtcs_pe; /* contains the number of vertices on each processor */ int *avail_pes; /* contains the processors to be used at each level */ n = A->ncol; /* allocate memory */ if (!(globToLoc = intMalloc_dist(n + 1))) { fprintf (stderr, "Malloc fails for globToLoc[]."); return (PS->allocMem); } PS->allocMem += (n+1) * sizeof(int_t); if (!(avail_pes = (int *) SUPERLU_MALLOC(nprocs_symb*sizeof(int)))) { fprintf (stderr, "Malloc fails for avail_pes[]."); return (PS->allocMem); } PS->allocMem += nprocs_symb*sizeof(int); if (!(vtcs_pe = (int_t *) SUPERLU_MALLOC(nprocs_symb*sizeof(int_t)))) { fprintf (stderr, "Malloc fails for vtcs_pe[]."); return (PS->allocMem); } PS->allocMem += nprocs_symb*sizeof(int_t); /* Initialization */ globToLoc[n] = n; for (p = 0; p < nprocs_symb; p++) { vtcs_pe[p] = 0; avail_pes[p] = EMPTY; } nvtcs_loc = 0; nblks_loc = 0; maxNvtcsNds_loc = 0; maxNeltsVtx = 0; /* distribute data among processors */ szSep = nprocs_symb; iSep = 0; while (szSep >= 1) { /* for each level in the separator tree */ npNode = nprocs_symb / szSep; firstP = 0; nvtcsNds_loc = 0; for (jSep = iSep; jSep < iSep + szSep; jSep++) { /* for each node in the level */ fstVtx = fstVtxSep[jSep]; lstVtx = fstVtx + sizes[jSep]; if (firstP <= iam && iam < firstP + npNode) maxNeltsVtx += lstVtx - fstVtx; if (szSep == nprocs_symb) { /* leaves of the separator tree */ for (k = fstVtx; k < lstVtx; k++) { globToLoc[k] = (int_t) firstP; vtcs_pe[firstP] ++; } if (firstP == iam) { nvtcs_loc += lstVtx - fstVtx; if (fstVtx != lstVtx) nblks_loc ++; } } else { /* superior levels of the separator tree */ k = fstVtx; noVtcsProc = maxSzBlk; fstVtxBlk = fstVtx; if ((jSep - iSep) % 2 == 0) ind_ap_d = (jSep - iSep) * npNode; /* first allocate processors from previous levels */ for (ind_ap_s = (jSep-iSep) * npNode; ind_ap_s < (jSep-iSep+1) * npNode; ind_ap_s ++) { p = avail_pes[ind_ap_s]; if (p != EMPTY && k < lstVtx) { /* for each column in the separator */ avail_pes[ind_ap_s] = EMPTY; kk = 0; while (kk < noVtcsProc && k < lstVtx) { globToLoc[k] = p; vtcs_pe[p] ++; k ++; kk ++; } if (p == iam) { nvtcs_loc += kk; nblks_loc ++; nvtcsNds_loc += kk; } } else { if (p != EMPTY && k == lstVtx) { avail_pes[ind_ap_s] = EMPTY; avail_pes[ind_ap_d] = p; ind_ap_d ++; } } } noBlk = 0; p = firstP + npNode; while (k < lstVtx) { /* for each column in the separator */ kk = 0; p = (int) (noBlk % (int_t) npNode) + firstP; while (kk < noVtcsProc && k < lstVtx) { globToLoc[k] = p; vtcs_pe[p] ++; k ++; kk ++; } if (p == iam) { nvtcs_loc += kk; nblks_loc ++; nvtcsNds_loc += kk; } noBlk ++; } /* while (k < lstVtx) */ /* Add the unused processors to the avail_pes list of pes */ for (p = p + 1; p < firstP + npNode; p ++) { avail_pes[ind_ap_d] = p; ind_ap_d ++; } } firstP += npNode; } if (maxNvtcsNds_loc < nvtcsNds_loc && szSep != nprocs_symb) maxNvtcsNds_loc = nvtcsNds_loc; iSep += szSep; szSep = szSep / 2; } #if ( PRNTlevel>=2 ) if (!iam) PrintInt10 (" novtcs_pe", nprocs_symb, vtcs_pe); #endif /* determine maximum number of vertices among processors */ maxNvtcsPProc = vtcs_pe[0]; vtcs_pe[0] = 0; for (p = 1; p < nprocs_symb; p++) { if (maxNvtcsPProc < vtcs_pe[p]) maxNvtcsPProc = vtcs_pe[p]; vtcs_pe[p] = 0; } #if ( PRNTlevel>=2 ) if (!iam) printf (" MaxNvtcsPerProc %d MaxNvtcs/Avg %e\n\n", maxNvtcsPProc, ((float) maxNvtcsPProc * nprocs_symb)/(float)n); #endif if (iam < nprocs_symb) if (!(begEndBlks_loc = intMalloc_symbfact(2 * nblks_loc + 1))) ABORT("Malloc fails for begEndBlks_loc[]."); ind_blk = 0; k = 0; while (k < n) { p = globToLoc[k]; if (p == iam) begEndBlks_loc[ind_blk] = k; while (globToLoc[k] == p && k < n) { globToLoc[k] = globToLoc[k] * maxNvtcsPProc + vtcs_pe[p]; vtcs_pe[p] ++; k ++; } if (p == iam) { begEndBlks_loc[ind_blk + 1] = k; ind_blk += 2; } } if (iam < nprocs_symb) begEndBlks_loc[2 * nblks_loc] = n; SUPERLU_FREE (avail_pes); SUPERLU_FREE (vtcs_pe); Pslu_freeable->maxNvtcsPProc = maxNvtcsPProc; Pslu_freeable->globToLoc = globToLoc; if (iam < nprocs_symb) { VInfo->maxNvtcsNds_loc = maxNvtcsNds_loc; VInfo->nblks_loc = nblks_loc; VInfo->nvtcs_loc = nvtcs_loc; VInfo->curblk_loc = 0; VInfo->maxNeltsVtx = maxNeltsVtx; VInfo->filledSep = FALSE; VInfo->xlsub_nextLvl = 0; VInfo->xusub_nextLvl = 0; VInfo->begEndBlks_loc = begEndBlks_loc; VInfo->fstVtx_nextLvl = begEndBlks_loc[0]; } return SUCCES_RET; } static void symbfact_distributeMatrix ( int iam, /* Input - my processor number */ int nprocs_num, /* Input - number of processors */ int nprocs_symb, /* Input - number of processors for the symbolic factorization */ SuperMatrix *A, /* Input - input matrix A */ int_t *perm_c, /* Input - column permutation */ int_t *perm_r, /* Input - row permutation */ matrix_symbfact_t *AS, /* Output - temporary storage for the redistributed matrix */ Pslu_freeable_t *Pslu_freeable, /* Input - global to local information */ vtcsInfo_symbfact_t *VInfo, /* Input - local info on vertices distribution */ int_t *tempArray, /* Input/Output - temporary array of size n (order of the matrix) */ MPI_Comm *num_comm /* Input - communicator for nprocs_num procs */ ) { /*! \brief * *
 * Purpose 
 * =======
 *
 * Distribute input matrix A for the symbolic factorization routine.
 * Only structural information is distributed.  The redistributed
 * matrix has its rows and columns permuted according to perm_r and
 * perm_c. A is not modified during this routine.
 * 
*/ /* Notations: * Ainf : inferior part of A, including diagonal. * Asup : superior part of A. */ int p, p_irow, code_err, ainf_data; int_t n, m_loc, fst_row; int_t i, j, k, irow, jcol; NRformat_loc *Astore; int_t nnz_loc, nnz_iam; /* number of local nonzeros */ int_t nnz_remote; /* number of remote nonzeros to be sent */ int_t SendCnt; /* number of remote nonzeros to be sent */ int_t RecvCnt; /* number of remote nonzeros to be received */ /* number of nonzeros to send/receive per processor */ int_t *nnzToSend, *nnzToRecv; int_t *nnzAinf_toSnd; /* nnz in Ainf to send */ /* VInfo data structures */ int_t *globToLoc, *begEndBlks_loc, nblks_loc, nvtcs_loc, maxNvtcsPProc; int_t neltsRow, vtx, vtx_lid, nelts, ind; int_t *snd_aind, *rcv_aind; int_t *ptr_toSnd, *buf, *ptr_toRcv; /* matrix_symbfact_t *As data */ int_t *x_ainf, *x_asup, *ind_ainf, *ind_asup; int *intBuf1, *intBuf2, *intBuf3, *intBuf4; /* ------------------------------------------------------------ INITIALIZATION. ------------------------------------------------------------*/ Astore = (NRformat_loc *) A->Store; n = A->ncol; m_loc = Astore->m_loc; fst_row = Astore->fst_row; globToLoc = Pslu_freeable->globToLoc; maxNvtcsPProc = Pslu_freeable->maxNvtcsPProc; nnzToRecv = intCalloc_symbfact(3 * (int_t)nprocs_num); nnzToSend = nnzToRecv + nprocs_num; nnzAinf_toSnd = nnzToRecv + 2 * nprocs_num; /* --------------------------------------------------------------------- COUNT THE NUMBER OF NONZEROS TO BE SENT TO EACH PROCESS, THEN ALLOCATE SPACE. THIS ACCOUNTS FOR THE FIRST PASS OF A. ----------------------------------------------------------------------*/ /* tempArray stores the number of nonzeros in each column of ainf */ for (i = 0; i < n; i++) tempArray[i] = 0; for (i = 0; i < m_loc; i++) { irow = perm_c[perm_r[i+fst_row]]; /* Row number in Pc*Pr*A */ p_irow = OWNER(globToLoc[irow]); neltsRow = 0; for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; j++) { jcol = perm_c[Astore->colind[j]]; if (jcol <= irow) { p = OWNER(globToLoc[jcol]); if (tempArray[jcol] == 0) { nnzToSend[p] += 2; nnzAinf_toSnd[p] += 2; } tempArray[jcol] ++; nnzAinf_toSnd[p] ++; } else { p = p_irow; neltsRow ++; } nnzToSend[p] ++; } if (neltsRow != 0) { nnzToSend[p_irow] += 2; } } /* add one entry which will separate columns of Ainf from rows of Asup */ for (p = 0; p < nprocs_num; p++) if (nnzToSend[p] != 0) nnzToSend[p] ++; /* All-to-all communication */ MPI_Alltoall (nnzToSend, 1, mpi_int_t, nnzToRecv, 1, mpi_int_t, (*num_comm)); nnz_loc = SendCnt = RecvCnt = 0; for (p = 0; p < nprocs_num; p++) { if ( p != iam ) { SendCnt += nnzToSend[p]; RecvCnt += nnzToRecv[p]; } else { nnz_loc += nnzToRecv[p]; nnzToSend[p] = 0; } } nnz_iam = nnz_loc + RecvCnt; /* Total nonzeros ended up in my process. */ /* Allocate temporary storage for sending/receiving the A triplets. */ if (!(snd_aind = intMalloc_symbfact(SendCnt)) && SendCnt != 0) ABORT("Malloc fails for snd_aind[]."); if ( !(rcv_aind = intMalloc_symbfact(nnz_iam + 1))) ABORT("Malloc fails for rcv_aind[]."); if ( !(ptr_toSnd = intCalloc_symbfact((int_t) nprocs_num)) ) ABORT("Malloc fails for ptr_toSnd[]."); if ( !(ptr_toRcv = intCalloc_symbfact((int_t) nprocs_num)) ) ABORT("Malloc fails for ptr_toRcv[]."); /* setup ptr_toSnd[p] to point to data in snd_aind to be send to processor p */ /* VS 2017 crashes without this rearrangement -- fix by John Cary 1/20/18 */ /* for (i = 0, j = 0, p = 0; p < nprocs_num; p++) { */ i = 0; j = 0; p = 0; while (1) { if ( p != iam ) ptr_toSnd[p] = i; else ptr_toSnd[p] = j; i += nnzToSend[p]; j += nnzToRecv[p]; p ++; if (p >= nprocs_num) break; } for (i = 0; i < n; i++) { if (tempArray[i] != 0) { /* column i of Ainf will be send to a processor */ p = OWNER( globToLoc[i] ); if (p == iam) { buf = &(rcv_aind[ptr_toSnd[p]]); } else { buf = &(snd_aind[ptr_toSnd[p]]); } buf[0] = tempArray[i]; buf[1] = i; tempArray[i] = ptr_toSnd[p] + 2; ptr_toSnd[p] += 2 + buf[0]; } } /* set ptr_toSnd to point to Asup data (stored by rows) */ for (i = 0, j = 0, p = 0; p < nprocs_num; p++) { if ( p != iam ) { if (nnzToSend[p] != 0) { snd_aind[i + nnzAinf_toSnd[p]] = EMPTY; ptr_toSnd[p] = i + nnzAinf_toSnd[p] + 1; } } else { if (nnzToRecv[p] != 0) { rcv_aind[j + nnzAinf_toSnd[p]] = EMPTY; ptr_toSnd[p] = j + nnzAinf_toSnd[p] + 1; } } i += nnzToSend[p]; j += nnzToRecv[p]; } /* ------------------------------------------------------------ LOAD THE ENTRIES OF A INTO THE snd_aind STRUCTURE TO SEND. THIS ACCOUNTS FOR THE SECOND PASS OF A. For each processor, we store first the columns to be sent, and then the rows to be sent. For each row/column sent: entry 0 : x = number of elements in that row/column entry 1 : row/column number entries 2 .. x + 2 : row/column indices. ------------------------------------------------------------*/ for (i = 0; i < m_loc; i++) { irow = perm_c[perm_r[i+fst_row]]; /* Row number in Pc*A */ p_irow = OWNER( globToLoc[irow] ); ptr_toSnd[p_irow] +=2; neltsRow = 0; for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; j++) { jcol = perm_c[Astore->colind[j]]; if (jcol <= irow) { p = OWNER( globToLoc[jcol] ); k = tempArray[jcol]; tempArray[jcol] ++; if (p == iam) { /* local */ rcv_aind[k] = irow; } else { snd_aind[k] = irow; } } else { p = p_irow; neltsRow ++; k = ptr_toSnd[p]; ptr_toSnd[p] ++; if (p == iam) { /* local */ rcv_aind[k] = jcol; } else { snd_aind[k] = jcol; } } } if (neltsRow == 0) ptr_toSnd[p_irow] -= 2; else { /* store entry 0 and entry 1 */ if (p_irow == iam) { /* local */ rcv_aind[ptr_toSnd[p_irow] - neltsRow - 2] = neltsRow; rcv_aind[ptr_toSnd[p_irow] - neltsRow - 1] = irow; } else { /* remote */ snd_aind[ptr_toSnd[p_irow] - neltsRow - 2] = neltsRow; snd_aind[ptr_toSnd[p_irow] - neltsRow - 1] = irow; } } } /* reset ptr_toSnd to point to the beginning of the data for each processor (structure needed in MPI_Alltoallv */ for (i = 0, j = 0, p = 0; p < nprocs_num; p++) { ptr_toSnd[p] = i; i += nnzToSend[p]; ptr_toRcv[p] = j; j += nnzToRecv[p]; } /* ------------------------------------------------------------ PERFORM REDISTRIBUTION. THIS INVOLVES ALL-TO-ALL COMMUNICATION. Note: it uses MPI_Alltoallv. ------------------------------------------------------------*/ if (nprocs_num > 1) { #if defined (_LONGINT) intBuf1 = (int *) SUPERLU_MALLOC(4 * nprocs_num * sizeof(int)); intBuf2 = intBuf1 + nprocs_num; intBuf3 = intBuf1 + 2 * nprocs_num; intBuf4 = intBuf1 + 3 * nprocs_num; for (p=0; p INT_MAX || ptr_toSnd[p] > INT_MAX || nnzToRecv[p] > INT_MAX || ptr_toRcv[p] > INT_MAX) ABORT("ERROR in symbfact_distributeMatrix size to send > INT_MAX\n"); intBuf1[p] = (int) nnzToSend[p]; intBuf2[p] = (int) ptr_toSnd[p]; intBuf3[p] = (int) nnzToRecv[p]; intBuf4[p] = (int) ptr_toRcv[p]; } intBuf1[iam]=0; /* This corresponds to nnzToSend[iam] */ intBuf3[iam]=0; /* This corresponds to nnzToRecv[iam] */ #else /* Default */ intBuf1 = nnzToSend; intBuf2 = ptr_toSnd; intBuf3 = nnzToRecv; intBuf4 = ptr_toRcv; i = nnzToRecv[iam]; nnzToRecv[iam] = 0; nnzToSend[iam] = 0; #endif MPI_Alltoallv (snd_aind, intBuf1, intBuf2, mpi_int_t, rcv_aind, intBuf3, intBuf4, mpi_int_t, (*num_comm)); #if defined (_LONGINT) SUPERLU_FREE (intBuf1); #else /* Default */ nnzToRecv[iam] = i; #endif } /* ------------------------------------------------------------ DEALLOCATE SEND STORAGE ------------------------------------------------------------*/ if (snd_aind) SUPERLU_FREE( snd_aind ); SUPERLU_FREE( ptr_toSnd ); /* ------------------------------------------------------------ CONVERT THE RECEIVED FORMAT INTO THE SYMBOLIC FORMAT. THIS IS PERFORMED ONLY BY NPROCS_SYMB PROCESSORS ------------------------------------------------------------*/ if (iam < nprocs_symb) { nblks_loc = VInfo->nblks_loc; begEndBlks_loc = VInfo->begEndBlks_loc; nvtcs_loc = VInfo->nvtcs_loc; /* ------------------------------------------------------------ Allocate space for storing indices of A after redistribution. ------------------------------------------------------------*/ if (!(x_ainf = intCalloc_symbfact (nvtcs_loc + 1))) ABORT("Malloc fails for x_ainf[]."); if (!(x_asup = intCalloc_symbfact (nvtcs_loc + 1))) ABORT("Malloc fails for x_asup[]."); /* Initialize the array of columns/rows pointers */ for (i = 0, p = 0; p < nprocs_num; p++) { ainf_data = TRUE; k = 0; while (k < nnzToRecv[p]) { j = rcv_aind[i + k]; if (j == EMPTY) { ainf_data = FALSE; k ++; } else { nelts = rcv_aind[i + k]; vtx = rcv_aind[i + k + 1]; vtx_lid = LOCAL_IND( globToLoc[vtx] ); k += nelts + 2; if (ainf_data) x_ainf[vtx_lid] += nelts; else x_asup[vtx_lid] = nelts; } } i += nnzToRecv[p]; } /* copy received information */ vtx_lid = 0; for (i = 0, k = 0, j = 0; i < nblks_loc; i++) { for (vtx = begEndBlks_loc[2*i]; vtx < begEndBlks_loc[2*i+1]; vtx++, vtx_lid ++) { nelts = x_ainf[vtx_lid]; x_ainf[vtx_lid] = k; k += nelts; nelts = x_asup[vtx_lid]; x_asup[vtx_lid] = j; j += nelts; tempArray[vtx] = x_ainf[vtx_lid]; } } x_ainf[nvtcs_loc] = k; x_asup[nvtcs_loc] = j; /* Allocate space for storing indices of A after conversion */ if ( !(ind_ainf = intMalloc_symbfact(x_ainf[nvtcs_loc])) && x_ainf[nvtcs_loc] != 0 ) ABORT("Malloc fails for ind_ainf[]."); if ( !(ind_asup = intMalloc_symbfact(x_asup[nvtcs_loc])) && x_asup[nvtcs_loc] != 0) ABORT("Malloc fails for ind_asup[]."); /* Copy the data into the row/column oriented storage */ for (i = 0, p = 0; p < nprocs_num; p++) { ainf_data = TRUE; k = 0; while (k < nnzToRecv[p]) { j = rcv_aind[i + k]; if (ainf_data && j == EMPTY) { ainf_data = FALSE; k ++; } else { nelts = rcv_aind[i + k]; vtx = rcv_aind[i + k + 1]; vtx_lid = LOCAL_IND( globToLoc[vtx] ); if (ainf_data) { /* traverse ainf data */ ind = tempArray[vtx]; for (j = i + k + 2; j < i + k + 2 + nelts; j++, ind ++) ind_ainf[ind] = rcv_aind[j]; tempArray[vtx] = ind; } else { /* traverse asup data */ ind = x_asup[vtx_lid]; for (j = i + k + 2; j < i + k + 2 + nelts; j++, ind ++) ind_asup[ind] = rcv_aind[j]; } k += nelts + 2; } } i += nnzToRecv[p]; } /* ------------------------------------------------------------ DEALLOCATE TEMPORARY STORAGE ------------------------------------------------------------*/ SUPERLU_FREE( ptr_toRcv ); if (rcv_aind) SUPERLU_FREE( rcv_aind ); if (nnzToRecv) SUPERLU_FREE( nnzToRecv ); AS->x_ainf = x_ainf; AS->x_asup = x_asup; AS->ind_ainf = ind_ainf; AS->ind_asup = ind_asup; VInfo->nnz_asup_loc = x_asup[nvtcs_loc]; VInfo->nnz_ainf_loc = x_ainf[nvtcs_loc]; } } static float allocPrune_lvl ( Llu_symbfact_t *Llu_symbfact, /* Input/Output - local L, U data structures */ vtcsInfo_symbfact_t *VInfo, /* Input -local info on vertices distribution */ psymbfact_stat_t *PS /* Input -statistics */ ) /*! \brief * *
 * Allocate storage for data structures necessary for pruned graphs.
 * For those unpredictable size, make a guess as FILL * n.
 * Return value:
 *     0 if enough memory was available;
 *     otherwise, return the amount of space intended to allocate 
 *     when memory allocation failure occurred.
 * 
*/ { int_t lword; int_t nzlmaxPr, nzumaxPr, *xlsubPr, *xusubPr, *lsubPr, *usubPr; int_t nvtcs_loc, no_expand_pr, x_sz; float alpha = 1.5; int_t FILL = sp_ienv_dist(6); nvtcs_loc = VInfo->nvtcs_loc; no_expand_pr = 0; lword = (int_t) sizeof(int_t); /* free memory allocated for the domain symbolic factorization */ if (Llu_symbfact->szLsubPr) SUPERLU_FREE( Llu_symbfact->lsubPr ); if (Llu_symbfact->szUsubPr) SUPERLU_FREE( Llu_symbfact->usubPr ); if (Llu_symbfact->xlsubPr) SUPERLU_FREE( Llu_symbfact->xlsubPr ); if (Llu_symbfact->xusubPr) SUPERLU_FREE( Llu_symbfact->xusubPr ); Llu_symbfact->xlsub_rcvd = intMalloc_symbfact (VInfo->maxSzBlk + 1); Llu_symbfact->xusub_rcvd = intMalloc_symbfact (VInfo->maxSzBlk + 1); /* allocate memory to use during superior levels of sep_tree */ x_sz = SUPERLU_MIN( VInfo->maxNvtcsNds_loc, VInfo->maxSzBlk); nzlmaxPr = 2 * FILL * VInfo->maxNvtcsNds_loc; nzumaxPr = 2 * FILL * VInfo->maxSzBlk; /* Integer pointers for L\U factors */ if (x_sz != 0) { xlsubPr = intMalloc_symbfact(VInfo->maxNvtcsNds_loc + 1); xusubPr = intMalloc_symbfact(VInfo->maxNvtcsNds_loc + 1); lsubPr = (int_t *) SUPERLU_MALLOC (nzlmaxPr * lword); usubPr = (int_t *) SUPERLU_MALLOC (nzumaxPr * lword); while ( !lsubPr || !usubPr ) { if ( lsubPr ) SUPERLU_FREE( lsubPr ); if ( usubPr ) SUPERLU_FREE( usubPr ); nzlmaxPr /= 2; nzlmaxPr = alpha * (float) nzlmaxPr; nzumaxPr /= 2; nzumaxPr = alpha * (float) nzumaxPr; if ( nzumaxPr < x_sz ) { fprintf(stderr, "Not enough memory to perform factorization.\n"); return (PS->allocMem); } lsubPr = (int_t *) SUPERLU_MALLOC(nzlmaxPr * lword); usubPr = (int_t *) SUPERLU_MALLOC(nzumaxPr * lword); ++no_expand_pr; } } else { xlsubPr = NULL; lsubPr = NULL; xusubPr = NULL; usubPr = NULL; nzlmaxPr = 0; nzumaxPr = 0; } if (VInfo->maxNvtcsNds_loc) Llu_symbfact->cntelt_vtcsA_lvl = (int_t *) SUPERLU_MALLOC (VInfo->maxNvtcsNds_loc * lword); if (PS->maxSzLPr < Llu_symbfact->indLsubPr) PS->maxSzLPr = Llu_symbfact->indLsubPr; if (PS->maxSzUPr < Llu_symbfact->indUsubPr) PS->maxSzUPr = Llu_symbfact->indUsubPr; Llu_symbfact->lsubPr = lsubPr; Llu_symbfact->xlsubPr = xlsubPr; Llu_symbfact->usubPr = usubPr; Llu_symbfact->xusubPr = xusubPr; Llu_symbfact->szLsubPr = nzlmaxPr; Llu_symbfact->szUsubPr = nzumaxPr; Llu_symbfact->indLsubPr = 0; Llu_symbfact->indUsubPr = 0; Llu_symbfact->no_expand_pr += no_expand_pr; return 0; } static float allocPrune_domain ( int_t fstVtx, /* Input - first vertex of current node */ int_t lstVtx, /* Input - last vertex of current node */ Llu_symbfact_t *Llu_symbfact, /* Output - local L, U data structures */ vtcsInfo_symbfact_t *VInfo, /* Input -local info on vertices distribution */ psymbfact_stat_t *PS /* Input -statistics */ ) /*! \brief * *
 * Allocate storage for data structures necessary for pruned graphs.
 * For those unpredictable size, make a guess as FILL * n.
 * Return value:
 *     0 if enough memory was available;
 *     otherwise, return the amount of space intended to allocate 
 *     when memory allocation failure occurred.
 * 
*/ { int_t lword; int_t nzlmaxPr, nzumaxPr, *xlsubPr, *xusubPr, *lsubPr, *usubPr; int_t nvtcs_loc, no_expand_pr, x_sz; float alpha = 1.5; int_t FILL = 2 * sp_ienv_dist(6); nvtcs_loc = VInfo->nvtcs_loc; no_expand_pr = 0; lword = (int_t) sizeof(int_t); /* allocate memory to use during domain_symbolic routine */ /* Guess for prune graph */ x_sz = lstVtx - fstVtx; nzlmaxPr = nzumaxPr = 2*FILL * x_sz; /* Integer pointers for L\U factors */ if (x_sz != 0) { xlsubPr = intMalloc_symbfact(x_sz+1); xusubPr = intMalloc_symbfact(x_sz+1); lsubPr = (int_t *) SUPERLU_MALLOC (nzlmaxPr * lword); usubPr = (int_t *) SUPERLU_MALLOC (nzumaxPr * lword); while ( !lsubPr || !usubPr ) { if ( lsubPr ) SUPERLU_FREE(lsubPr); if ( usubPr ) SUPERLU_FREE(usubPr); nzlmaxPr /= 2; nzlmaxPr = alpha * (float) nzlmaxPr; nzumaxPr /= 2; nzumaxPr = alpha * (float) nzumaxPr; if ( nzumaxPr < x_sz ) { fprintf(stderr, "Not enough memory to perform factorization.\n"); return (PS->allocMem); } lsubPr = (void *) SUPERLU_MALLOC(nzlmaxPr * lword); usubPr = (void *) SUPERLU_MALLOC(nzumaxPr * lword); ++no_expand_pr; } } else { xlsubPr = NULL; xusubPr = NULL; } Llu_symbfact->lsubPr = lsubPr; Llu_symbfact->xlsubPr = xlsubPr; Llu_symbfact->usubPr = usubPr; Llu_symbfact->xusubPr = xusubPr; Llu_symbfact->szLsubPr = nzlmaxPr; Llu_symbfact->szUsubPr = nzumaxPr; Llu_symbfact->indLsubPr = 0; Llu_symbfact->indUsubPr = 0; Llu_symbfact->xlsub_rcvd = NULL; Llu_symbfact->xusub_rcvd = NULL; Llu_symbfact->cntelt_vtcsA_lvl = NULL; PS->maxSzLPr = Llu_symbfact->indLsubPr; PS->maxSzUPr = Llu_symbfact->indUsubPr; Llu_symbfact->no_expand_pr = no_expand_pr; Llu_symbfact->no_expcp = 0; return 0; } /************************************************************************/ static int symbfact_alloc /************************************************************************/ ( int_t n, /* Input - order of the matrix */ int nprocs, /* Input - number of processors for the symbolic factorization */ Pslu_freeable_t *Pslu_freeable, Llu_symbfact_t *Llu_symbfact, /* Output - local L, U data structures */ vtcsInfo_symbfact_t *VInfo, /* Input - local info on vertices distribution */ comm_symbfact_t *CS, /* Input -information on communication */ psymbfact_stat_t *PS /* Input -statistics */ ) /*! \brief * *
 * Allocate storage for the data structures common to symbolic factorization
 * routines. For those unpredictable size, make a guess as FILL * nnz(A).
 * Return value:
 *     0 if enough memory was available;
 *     otherwise, return the amount of space intended to allocate 
 *     when memory allocation failure occurred.
 * 
*/ { int nlvls, p; /* no of levels in the separator tree */ int_t lword, no_expand; int_t *xsup, *supno; int_t *lsub, *xlsub; int_t *usub, *xusub; int_t nzlmax, nzumax, nnz_a_loc; int_t nvtcs_loc, *cntelt_vtcs; float alpha = 1.5; int_t FILL = sp_ienv_dist(6); nvtcs_loc = VInfo->nvtcs_loc; nnz_a_loc = VInfo->nnz_ainf_loc + VInfo->nnz_asup_loc; nlvls = (int) LOG2( nprocs ) + 1; no_expand = 0; lword = sizeof(int_t); /* Guess for L\U factors */ nzlmax = nzumax = FILL * nnz_a_loc + 1; /* Integer pointers for L\U factors */ supno = intMalloc_symbfact(nvtcs_loc+1); xlsub = intMalloc_symbfact(nvtcs_loc+1); xusub = intMalloc_symbfact(nvtcs_loc+1); lsub = (void *) SUPERLU_MALLOC(nzlmax * lword); usub = (void *) SUPERLU_MALLOC(nzumax * lword); while ( !lsub || !usub ) { if (!lsub) SUPERLU_FREE(lsub); if (!usub) SUPERLU_FREE(usub); nzlmax /= 2; nzlmax = alpha * nzlmax; nzumax /= 2; nzumax = alpha * nzumax; if ( nzumax < nnz_a_loc/2 ) { fprintf(stderr, "Not enough memory to perform factorization.\n"); return (PS->allocMem); } lsub = (void *) SUPERLU_MALLOC(nzlmax * lword); usub = (void *) SUPERLU_MALLOC(nzumax * lword); ++no_expand; } if (nprocs == 1) cntelt_vtcs = NULL; else cntelt_vtcs = intMalloc_symbfact (nvtcs_loc+1); /* allocate memory for communication data structures */ CS->rcv_interLvl = intMalloc_symbfact (2 * (int_t) nprocs + 1); CS->snd_interLvl = intMalloc_symbfact (2 * (int_t) nprocs + 1); CS->ptr_rcvBuf = intMalloc_symbfact (2 * (int_t) nprocs ); CS->rcv_intraLvl = intMalloc_symbfact ((int_t) nprocs + 1); CS->snd_intraLvl = intMalloc_symbfact ((int_t) nprocs + 1); CS->snd_interSz = intMalloc_symbfact ((int_t) nlvls + 1); CS->snd_LinterSz = intMalloc_symbfact ((int_t) nlvls + 1); CS->snd_vtxinter = intMalloc_symbfact ((int_t) nlvls + 1); CS->rcv_bufSz = 0; CS->rcv_buf = NULL; CS->snd_bufSz = 0; CS->snd_buf = NULL; for (p = 0; p < nprocs; p++) { CS->rcv_interLvl[p] = EMPTY; CS->snd_interLvl[p] = EMPTY; CS->rcv_intraLvl[p] = EMPTY; CS->snd_intraLvl[p] = EMPTY; } for (p = 0; p <= nlvls; p++) { CS->snd_vtxinter[p] = EMPTY; CS->snd_interSz[p] = 0; CS->snd_LinterSz[p] = 0; } Pslu_freeable->supno_loc = supno; Llu_symbfact->lsub = lsub; Llu_symbfact->xlsub = xlsub; Llu_symbfact->usub = usub; Llu_symbfact->xusub = xusub; Llu_symbfact->szLsub = nzlmax; Llu_symbfact->szUsub = nzumax; Llu_symbfact->cntelt_vtcs = cntelt_vtcs; Llu_symbfact->no_expand = no_expand; return SUCCES_RET; } /* SYMBFACT_ALLOC */ static int_t symbfact_vtx ( int_t n, /* Input - order of the matrix */ int iam, /* Input - my processor number */ int_t vtx, /* Input - vertex number to perform symbolic factorization */ int_t vtx_lid, /* Input - local vertex number */ int_t vtx_prid, /* Input - */ int_t computeL, /* Input - TRUE when compute column L(:,vtx) otheriwse compute row U(vtx, :) */ int domain_symb, /* Input - if TRUE, computation corresponds to the independent domain at the bottom of the separator tree */ int_t fstVtx, /* Input - first vertex of current node */ int_t lstVtx, /* Input - last vertex of current node */ int_t snrep_lid, /* local index of current supernode reprezentative */ int_t szSn, /* size of supernode with snrep_lid reprezentative */ int_t *p_next, /* next element in sub structure */ int_t *marker, int_t *sub_rcvd, /* elements of node */ int_t sub_rcvd_sz, /* size of sub to be explored */ Pslu_freeable_t *Pslu_freeable, Llu_symbfact_t *Llu_symbfact, /* Input/Output - local L, U data structures */ vtcsInfo_symbfact_t *VInfo, /* Input/Output - local info on vertices distribution */ psymbfact_stat_t *PS, int_t *p_neltsVtxInit, int_t *p_neltsVtx, int_t *p_neltsVtx_CSep, int_t *p_neltsZrVtx, int_t *p_neltsMatched, int_t mark_vtx, int_t *p_prval_curvtx, int_t vtx_bel_othSn, int_t *p_vtx_bel_mySn ) { int_t x_aind_beg, x_aind_end; int_t k, vtx_elt, ind, pr, pr_lid, mem_error, ii, jj, compRcvd; int_t *xsub, *sub, *xsubPr, *subPr, *xsub_rcvd, *xsub_src, *sub_src; int_t pr_elt, next, prval_curvtx, maxNvtcsPProc; int_t neltsVtx, neltsMatched, neltsZrVtx, neltsZrSn, neltsVtx_CSep; int_t neltsVtxInit, kk; int diagind, upd_lstSn; maxNvtcsPProc = Pslu_freeable->maxNvtcsPProc; upd_lstSn = FALSE; diagind = FALSE; prval_curvtx = *p_prval_curvtx; neltsVtx_CSep = 0; next = *p_next; if (computeL) { xsub = Llu_symbfact->xlsub; sub = Llu_symbfact->lsub; xsub_rcvd = Llu_symbfact->xlsub_rcvd; xsubPr = Llu_symbfact->xusubPr; subPr = Llu_symbfact->usubPr; } else { xsub = Llu_symbfact->xusub; sub = Llu_symbfact->usub; xsub_rcvd = Llu_symbfact->xusub_rcvd; xsubPr = Llu_symbfact->xlsubPr; subPr = Llu_symbfact->lsubPr; } x_aind_beg = xsub[vtx_lid]; x_aind_end = xsub[vtx_lid + 1]; xsub[vtx_lid] = next; k = x_aind_beg; /* while (sub[k] != EMPTY && k < x_aind_end) { */ while (k < x_aind_end) { if (sub[k] == EMPTY) k = x_aind_end; else { vtx_elt = sub[k]; if (!computeL) if (marker[vtx_elt] == mark_vtx - 2) if (vtx_elt < prval_curvtx) prval_curvtx = vtx_elt; marker[vtx_elt] = mark_vtx; if (computeL && vtx_elt == vtx) diagind = TRUE; if (!computeL && vtx_elt == vtx) printf ("Pe[%d] ERROR diag elt in U part vtx " IFMT " dom_s %d fstV " IFMT " lstV " IFMT "\n", iam, vtx, domain_symb, fstVtx, lstVtx); else { sub[next] = vtx_elt; next ++; } if (vtx_elt < lstVtx) neltsVtx_CSep ++; k++; } } neltsVtxInit = k - x_aind_beg; PS->nops += neltsVtxInit; if (domain_symb) { if (computeL) VInfo->nnz_ainf_loc -= x_aind_end - x_aind_beg; else VInfo->nnz_asup_loc -= x_aind_end - x_aind_beg; } #ifdef TEST_SYMB printf ("compL %d vtx %d vtx_lid %d vtx_prid %d vtx_bel_othSn %d\n", computeL, vtx, vtx_lid, vtx_prid, vtx_bel_othSn); PrintInt10 ("A(:, v)", x_aind_end - x_aind_beg, &(sub[xsub[vtx_lid]])); #endif ind = xsubPr[vtx_prid]; if (vtx_bel_othSn == vtx) upd_lstSn = TRUE; while (ind != EMPTY || upd_lstSn) { if (upd_lstSn ) { upd_lstSn = FALSE; pr_lid = snrep_lid; } else { pr_lid = subPr[ind]; ind = subPr[ind - 1]; } if (!computeL) marker[vtx] = mark_vtx; if (pr_lid >= VInfo->nvtcs_loc) { compRcvd = TRUE; xsub_src = xsub_rcvd; sub_src = sub_rcvd; pr_lid -= VInfo->nvtcs_loc; k = xsub_src[pr_lid] + RCVD_IND; } else { compRcvd = FALSE; xsub_src = xsub; sub_src = sub; k = xsub_src[pr_lid]; } PS->nops += xsub_src[pr_lid+1] - xsub_src[pr_lid]; for (; k < xsub_src[pr_lid+1]; k++) { pr_elt = sub_src[k]; if (pr_elt >= vtx && marker[pr_elt] != mark_vtx) { /* TEST available memory */ if (next >= x_aind_end) { if (domain_symb) { if (mem_error = psymbfact_LUXpandMem (iam, n, vtx, next, 0, computeL, DOMAIN_SYMB, 1, Pslu_freeable, Llu_symbfact, VInfo, PS)) return (mem_error); } else if (mem_error = psymbfact_LUXpand (iam, n, EMPTY, vtx, &next, 0, computeL, LL_SYMB, 1, Pslu_freeable, Llu_symbfact, VInfo, PS)) return (mem_error); x_aind_end = xsub[vtx_lid + 1]; if (computeL) sub = Llu_symbfact->lsub; else sub = Llu_symbfact->usub; if (!compRcvd) sub_src = sub; } sub[next] = pr_elt; next ++; if (pr_elt < lstVtx) neltsVtx_CSep ++; if (computeL && pr_elt == vtx) diagind = TRUE; if (!computeL) if (marker[pr_elt] == mark_vtx - 2) if (pr_elt < prval_curvtx) prval_curvtx = pr_elt; marker[pr_elt] = mark_vtx; } } } /* Abort if the diagonal element is zero */ if (computeL && diagind == FALSE) { printf("Pe[%d] At column " IFMT ", ", iam, vtx); ABORT("ParSymbFact() encounters zero diagonal"); } neltsVtx = next - xsub[vtx_lid]; neltsZrVtx = 0; /* number of zero elements which would be introduced in the vertex */ neltsZrSn = 0; /* -"- in the supernode */ neltsMatched = 0; if (vtx != fstVtx) { for (k = xsub[snrep_lid]; k < xsub[snrep_lid+1]; k++) { vtx_elt = sub[k]; if (vtx_elt >= vtx) { if ((vtx_elt > vtx && !computeL) || (vtx_elt >= vtx && computeL)) { if (marker[vtx_elt] != mark_vtx) neltsZrVtx ++; else { neltsMatched ++; } } if (computeL && vtx_elt == vtx) *p_vtx_bel_mySn = vtx; if (!computeL && vtx_elt == vtx + 1) *p_vtx_bel_mySn = vtx + 1; } } } else { neltsMatched = neltsVtx; if (! computeL) for (k = xsub[vtx_lid]; k < next; k++) { vtx_elt = sub[k]; if (vtx_elt == vtx + 1) *p_vtx_bel_mySn = vtx + 1; } } *p_neltsVtxInit = neltsVtxInit; *p_neltsVtx = neltsVtx; *p_neltsVtx_CSep = neltsVtx_CSep; *p_neltsZrVtx = neltsZrVtx; *p_neltsMatched = neltsMatched; *p_next = next; *p_prval_curvtx = prval_curvtx; return SUCCES_RET; } static int_t updateRcvd_prGraph ( int_t n, /* Input - order of the matrix */ int iam, /* Input - my processor number */ int_t *sub_rcvd, /* elements of node */ int_t sub_rcvd_sz, /* Input - size of sub to be used in the update */ int_t fstVtx_toUpd, /* Input - first vertex to update */ int_t lstVtx_toUpd, /* Input - last vertex to update */ int_t pr_offset, int computeL, int_t *marker, Pslu_freeable_t *Pslu_freeable, Llu_symbfact_t *Llu_symbfact, /* Input/Output - local L, U data structures */ vtcsInfo_symbfact_t *VInfo, /* Input - local info on vertices distribution */ psymbfact_stat_t *PS /* marker: first elements of marker contain the nodes that will be used in the updates */ ) { int_t i, k, nelts, prVal, vtx_elt, vtx_elt_lid, ind; int_t vtx, vtx_lid, fstVtx_toUpd_lid, fstVtx_srcUpd_lid; int_t *xsub, *sub, *xsub_rcvd, *xsubPr, *subPr, szsubPr, *p_indsubPr; int_t maxNvtcsPProc, *globToLoc, mem_error; int_t nvtcs_toUpd, fstVtx_srcUpd, vtx_lid_p; maxNvtcsPProc = Pslu_freeable->maxNvtcsPProc; globToLoc = Pslu_freeable->globToLoc; fstVtx_toUpd_lid = LOCAL_IND( globToLoc[fstVtx_toUpd] ); nvtcs_toUpd = lstVtx_toUpd - fstVtx_toUpd; if (computeL) { xsub = Llu_symbfact->xlsub; sub = Llu_symbfact->lsub; xsub_rcvd = Llu_symbfact->xlsub_rcvd; xsubPr = Llu_symbfact->xlsubPr; subPr = Llu_symbfact->lsubPr; p_indsubPr = &(Llu_symbfact->indLsubPr); szsubPr = Llu_symbfact->szLsubPr; } else { xsub = Llu_symbfact->xusub; sub = Llu_symbfact->usub; xsub_rcvd = Llu_symbfact->xusub_rcvd; xsubPr = Llu_symbfact->xusubPr; subPr = Llu_symbfact->usubPr; p_indsubPr = &(Llu_symbfact->indUsubPr); szsubPr = Llu_symbfact->szUsubPr; } /* count number of elements in transpose representation of sub_rcvd */ /* use marker to count those elements */ for (i = 0; i < nvtcs_toUpd; i++) marker[i] = 0; for (i = 0; i <= VInfo->maxSzBlk; i++) xsub_rcvd[i] = 0; i = 0; fstVtx_srcUpd = EMPTY; while (i < sub_rcvd_sz) { vtx = sub_rcvd[i + DIAG_IND]; nelts = sub_rcvd[i + NELTS_IND]; i += RCVD_IND; prVal = sub_rcvd[i]; if (fstVtx_srcUpd == EMPTY) fstVtx_srcUpd = vtx; xsub_rcvd[vtx - fstVtx_srcUpd] = i - RCVD_IND; xsub_rcvd[vtx-fstVtx_srcUpd+1] = i + nelts; for (k = i; k < i + nelts; k++) { vtx_elt = sub_rcvd[k]; if (vtx_elt > prVal) k = i + nelts; else { if (OWNER( globToLoc[vtx_elt] ) == iam) { if (vtx_elt >= fstVtx_toUpd && vtx_elt < lstVtx_toUpd) { vtx_elt_lid = LOCAL_IND( globToLoc[vtx_elt] ) - fstVtx_toUpd_lid; marker[vtx_elt_lid] ++; } } } } i += nelts; } vtx_lid = fstVtx_toUpd_lid - pr_offset; ind = 0; for (i = 0; i < nvtcs_toUpd; i++) { if (marker[i] != 0) { xsubPr[vtx_lid] = ind + 1; ind += 2* marker[i]; marker[i] = xsubPr[vtx_lid] - 1; } vtx_lid ++; } if (ind == 0) /* quick return if no update */ return 0; /* test if enough memory in usubPr array */ if (ind >= szsubPr) { if (mem_error = psymbfact_prLUXpand (iam, ind, computeL, Llu_symbfact, PS)) return (mem_error); if (computeL) subPr = Llu_symbfact->lsubPr; else subPr = Llu_symbfact->usubPr; } *p_indsubPr = ind; i = 0; while (i < sub_rcvd_sz) { vtx = sub_rcvd[i + DIAG_IND]; nelts = sub_rcvd[i + NELTS_IND]; i += RCVD_IND; prVal = sub_rcvd[i]; for (k = i; k < i + nelts; k++) { vtx_elt = sub_rcvd[k]; if (vtx_elt > prVal) k = i + nelts; else { if (OWNER( globToLoc[vtx_elt] ) == iam) { if (vtx_elt >= fstVtx_toUpd && vtx_elt < lstVtx_toUpd) { vtx_elt_lid = LOCAL_IND( globToLoc[vtx_elt] ); vtx_lid_p = vtx_elt_lid - pr_offset; vtx_elt_lid -= fstVtx_toUpd_lid; /* add vtx to structure of pruned graph */ if (marker[vtx_elt_lid] != xsubPr[vtx_lid_p] - 1) subPr[marker[vtx_elt_lid] - 2] = marker[vtx_elt_lid] + 1; subPr[marker[vtx_elt_lid] + 1] = vtx - fstVtx_srcUpd + VInfo->nvtcs_loc; subPr[marker[vtx_elt_lid]] = EMPTY; marker[vtx_elt_lid] += 2; } } } } i += nelts; } for (i = fstVtx_toUpd; i < nvtcs_toUpd; i++) marker[i] = 0; return 0; } static int_t update_prGraph ( int iam, int_t n, /* order of the matrix */ int_t fstVtx_blk, /* first vertex in block to factorize */ int_t lstVtx_blk, /* last vertex in block to factorize */ int_t snrep_lid, /* local index of current supernode reprezentative */ int_t pr_offset, /* offset in the indexing of prune structure */ int_t prval_cursn, /* prune value of current supernode reprezentative */ int_t xsub_snp1, /* denotes xsub[snrep_lid + 1] */ int computeL, /* Input - if 1, compute column L(:,vtx) else compute row U(vtx, :) */ Pslu_freeable_t *Pslu_freeable, Llu_symbfact_t *Llu_symbfact, /* Input/Output - local L, U data structures */ psymbfact_stat_t *PS ) { int_t k, mem_error; int_t kmin, kmax, ktemp, maxElt; int_t sn_elt, sn_elt_prid; int_t *globToLoc, maxNvtcsPProc; int_t *xsub, *sub, *xsubPr, *subPr; int_t *p_indsubPr, szsubPr; globToLoc = Pslu_freeable->globToLoc; maxNvtcsPProc = Pslu_freeable->maxNvtcsPProc; if (computeL) { xsub = Llu_symbfact->xlsub; sub = Llu_symbfact->lsub; xsubPr = Llu_symbfact->xlsubPr; subPr = Llu_symbfact->lsubPr; p_indsubPr = &(Llu_symbfact->indLsubPr); szsubPr = Llu_symbfact->szLsubPr; } else { xsub = Llu_symbfact->xusub; sub = Llu_symbfact->usub; xsubPr = Llu_symbfact->xusubPr; subPr = Llu_symbfact->usubPr; p_indsubPr = &(Llu_symbfact->indUsubPr); szsubPr = Llu_symbfact->szUsubPr; } kmin = xsub[snrep_lid]; kmax = xsub_snp1 - 1; if (prval_cursn != n) maxElt = prval_cursn; else maxElt = EMPTY; while (kmin <= kmax) { if (prval_cursn == n) { /* compute maximum element of L(:, vtx) */ if (sub[kmin] > maxElt) maxElt = sub[kmin]; kmin ++; } else { /* Do a quicksort-type partition. */ if (sub[kmax] > prval_cursn) kmax--; else if (sub[kmin] <= prval_cursn) kmin++; else { /* kmin does'nt belong to G^s(L), and kmax belongs: * interchange the two subscripts */ ktemp = sub[kmin]; sub[kmin] = sub[kmax]; sub[kmax] = ktemp; kmin ++; kmax --; } } } k = xsub[snrep_lid]; while (sub[k] <= prval_cursn && k < xsub_snp1) { sn_elt = sub[k]; if (sn_elt < lstVtx_blk) { sn_elt_prid = LOCAL_IND( globToLoc[sn_elt] ) - pr_offset; if ((*p_indsubPr) + 2 >= szsubPr) { if (mem_error = psymbfact_prLUXpand (iam, 0, computeL, Llu_symbfact, PS)) return (mem_error); if (computeL) { subPr = Llu_symbfact->lsubPr; szsubPr = Llu_symbfact->szLsubPr; } else { subPr = Llu_symbfact->usubPr; szsubPr = Llu_symbfact->szUsubPr; } } /* add krow to structure of pruned graph */ subPr[(*p_indsubPr) + 1] = snrep_lid; subPr[(*p_indsubPr)] = xsubPr[sn_elt_prid]; xsubPr[sn_elt_prid] = (*p_indsubPr) + 1; (*p_indsubPr) += 2; } if (sn_elt == maxElt) { /* move prune val in the first position */ sub[k] = sub[xsub[snrep_lid]]; sub[xsub[snrep_lid]] = sn_elt; } k ++; } return SUCCES_RET; } static int_t blk_symbfact (SuperMatrix *A, int iam, int lvl, int szSep, int ind_sizes1, int ind_sizes2, int_t *sizes, /* Input - sizes of each node in the separator tree */ int_t *fstVtxSep, /* Input - first vertex of each node in the tree */ int_t fstVtx_loc, /* Input - first vertex local of the level */ int_t fstVtx_blk, int_t lstVtx_blk, int_t *lsub_rcvd, /* elements of node */ int_t lsub_rcvd_sz, /* size of sub to be explored */ int_t *usub_rcvd, int_t usub_rcvd_sz, Pslu_freeable_t *Pslu_freeable, /* global LU data structures (modified) */ Llu_symbfact_t *Llu_symbfact, /* Input/Output - local L, U data structures */ vtcsInfo_symbfact_t *VInfo, /* Input/Output - local info on vertices distribution */ comm_symbfact_t *CS, psymbfact_stat_t *PS, int_t *marker, int_t *p_mark, /* marker used to merge elements of vertices */ int_t *p_nextl, /* ptr to nextl in lsub structure */ int_t *p_nextu, /* ptr to nextu in usub structure */ int_t *p_neltsZr, /* no of artificial zeros introduced so far */ int_t *p_neltsTotal, /* no of nonzeros (including artificials) computed so far */ int_t *p_nsuper_loc ) { int szSep_tmp, lvl_tmp, ii, jj; int_t *xlsubPr, *xusubPr; int_t *xsup, *supno, *lsub, *xlsub, *usub, *xusub; int_t vtx_lid, vtx_prid, vtx, vtx_super, vtx_elt, maxNvtcsPProc; int_t ind, pr, pr_elt, newnext, k, vtx_elt_lid; int_t nextl, nextu, nsuper_loc, nvtcs, n, mem_error; int_t x_aind_beg, x_aind_end, i, szLp, xlsub_snp1, xusub_snp1; int_t snrep, snrep_lid, szsn, vtxp1, *globToLoc, domain_symb; int_t lstVtx, neltsCurSep, maxNeltsVtx, fstVtx_loc_lid; /* supernode relaxation parameters */ int_t neltsVtx_L, neltsZrVtx_L, neltsMatched_L, neltsVtx_CSep_L; int_t neltsVtx_U, neltsZrVtx_U, neltsMatched_U, neltsVtx_CSep_U; int_t neltsZrSn_L, neltsZrSn_U, neltsZr, neltsTotal, neltsZr_tmp, neltsTotal_tmp, neltsZrSn, neltsVtxInit_l, neltsVtxInit_u; /* next vertex belongs to current supernode pruned structure */ int_t vtx_bel_snL, vtx_bel_snU; /* marker variables */ int_t markl1_vtx, markl2_vtx, marku1_vtx, marku2_vtx; /* prune structure variables */ int_t prval_cursn, prval_curvtx, pr_offset; /* variables for comms info */ int_t neltSn_L, neltSn_U, lstVtx_tmp, stat; float relax_param, relax_seps; if (fstVtx_blk >= lstVtx_blk) return 0; /* Initializations */ supno = Pslu_freeable->supno_loc; lsub = Llu_symbfact->lsub; xlsub = Llu_symbfact->xlsub; usub = Llu_symbfact->usub; xusub = Llu_symbfact->xusub; xusubPr = Llu_symbfact->xusubPr; xlsubPr = Llu_symbfact->xlsubPr; maxNvtcsPProc = Pslu_freeable->maxNvtcsPProc; globToLoc = Pslu_freeable->globToLoc; maxNeltsVtx = VInfo->maxNeltsVtx; n = A->ncol; nextl = *p_nextl; nextu = *p_nextu; neltsZr = *p_neltsZr; neltsTotal = *p_neltsTotal; nsuper_loc = *p_nsuper_loc; marku2_vtx = *p_mark; lstVtx = fstVtxSep[ind_sizes2] + sizes[ind_sizes2]; snrep = fstVtx_blk; snrep_lid = LOCAL_IND( globToLoc[fstVtx_blk] ); szsn = 1; nvtcs = lstVtx_blk - fstVtx_blk; prval_cursn = n; vtx_bel_snL = EMPTY; vtx_bel_snU = EMPTY; /* set up to EMPTY xlsubPr[], xusubPr[] */ if (PS->maxSzLPr < Llu_symbfact->indLsubPr) PS->maxSzLPr = Llu_symbfact->indLsubPr; if (PS->maxSzUPr < Llu_symbfact->indUsubPr) PS->maxSzUPr = Llu_symbfact->indUsubPr; for (i = 0; i < nvtcs; i++) { xlsubPr[i] = EMPTY; xusubPr[i] = EMPTY; } Llu_symbfact->indLsubPr = 0; Llu_symbfact->indUsubPr = 0; if (ind_sizes1 == 0) domain_symb = TRUE; else { domain_symb = FALSE; fstVtx_loc_lid = LOCAL_IND( globToLoc[fstVtx_loc] ); } vtx_prid = 0; vtx_lid = LOCAL_IND( globToLoc[fstVtx_blk] ); pr_offset = vtx_lid; if (lsub_rcvd != NULL) { updateRcvd_prGraph (n, iam, lsub_rcvd, lsub_rcvd_sz, fstVtx_blk, lstVtx_blk, pr_offset, 1, marker, Pslu_freeable, Llu_symbfact, VInfo, PS); updateRcvd_prGraph (n, iam, usub_rcvd, usub_rcvd_sz, fstVtx_blk, lstVtx_blk, pr_offset, 0, marker, Pslu_freeable, Llu_symbfact, VInfo, PS); } for (vtx = fstVtx_blk; vtx < lstVtx_blk; vtx++, vtx_lid ++, vtx_prid ++) { vtxp1 = vtx + 1; if (marku2_vtx +4 >= n) { /* reset to EMPTY marker array */ for (i = 0; i < n; i++) marker[i] = EMPTY; marku2_vtx = EMPTY; } markl1_vtx = marku2_vtx + 1; markl2_vtx = markl1_vtx + 1; marku1_vtx = markl2_vtx + 1; marku2_vtx = marku1_vtx + 1; prval_curvtx = n; /* Compute nonzero structure L(:,vtx) */ if (mem_error = symbfact_vtx (n, iam, vtx, vtx_lid, vtx_prid, 1, domain_symb, fstVtx_blk, lstVtx, snrep_lid, szsn, &nextl, marker, lsub_rcvd, lsub_rcvd_sz, Pslu_freeable, Llu_symbfact, VInfo, PS, &neltsVtxInit_l, &neltsVtx_L, &neltsVtx_CSep_L, &neltsZrVtx_L, &neltsMatched_L, markl1_vtx, &prval_curvtx, vtx_bel_snU, &vtx_bel_snL)) return (mem_error); lsub = Llu_symbfact->lsub; #ifdef TEST_SYMB PrintInt10 ("L(:, %d)", nextl - xlsub[vtx_lid], &(lsub[xlsub[vtx_lid]])); #endif /* Compute nonzero structure of U(vtx,:) */ if (mem_error = symbfact_vtx (n, iam, vtx, vtx_lid, vtx_prid, 0, domain_symb, fstVtx_blk, lstVtx, snrep_lid, szsn, &nextu, marker, usub_rcvd, usub_rcvd_sz, Pslu_freeable, Llu_symbfact, VInfo, PS, &neltsVtxInit_u, &neltsVtx_U, &neltsVtx_CSep_U, &neltsZrVtx_U, &neltsMatched_U, marku1_vtx, &prval_curvtx, vtx_bel_snL, &vtx_bel_snU)) return (mem_error); usub = Llu_symbfact->usub; #ifdef TEST_SYMB PrintInt10 ("U(%d, :)", nextu - xusub[vtx_lid], &(usub[xusub[vtx_lid]])); #endif /* update statistics on fill-in */ if (!domain_symb) { stat = CEILING( (neltsVtxInit_l + neltsVtxInit_u), 2); if (Llu_symbfact->cntelt_vtcsA_lvl[vtx_lid - fstVtx_loc_lid] != stat) { stat = CEILING(stat, Llu_symbfact->cntelt_vtcsA_lvl[vtx_lid - fstVtx_loc_lid]); PS->fill_pelt[0] += (float) stat; if ((float) stat > PS->fill_pelt[1]) PS->fill_pelt[1] = (float) stat; PS->fill_pelt[2] += 1.; } stat = CEILING( (neltsVtx_L + neltsVtx_U), 2); stat = CEILING( stat, Llu_symbfact->cntelt_vtcsA_lvl[vtx_lid - fstVtx_loc_lid] ); PS->fill_pelt[3] += (float) stat; if ((float) stat > PS->fill_pelt[4]) PS->fill_pelt[4] = (float) stat; PS->fill_pelt[5] += 1.; } /* compute number of artificial zeros */ neltsTotal = 0; neltsZr = 0; neltsZrSn_L = neltsVtx_L - neltsMatched_L; neltsZrSn_U = neltsVtx_U - neltsMatched_U; neltsZrSn = neltsZrVtx_L + neltsZrVtx_U + (neltsZrSn_L + neltsZrSn_U) * szsn; neltsZr_tmp = neltsZr + neltsZrSn; neltsTotal_tmp = neltsTotal + neltsZrSn + neltsVtx_L + neltsVtx_U; if (neltsTotal_tmp == 0) neltsTotal_tmp = 1; relax_param = (float) (neltsTotal_tmp - neltsZr_tmp) / neltsTotal_tmp; #ifdef TEST_SYMB printf ("[%d] vtx %d pr %d szsn %d nVtx_L %d nZrSn_L %d nZrVtx_L %d\n", iam, vtx, prval_curvtx, szsn,neltsVtx_L, neltsZrSn_L, neltsZrVtx_L); printf (" [%d] nVtx_U %d, nZrSn_U %d nZrVtx_U %d nextl %d nextu %d\n", iam, neltsVtx_U, neltsZrSn_U, neltsZrVtx_U, nextl, nextu); printf (" [%d] nZr %d nZr_tmp %d nTot %d nTot_tmp %d rel %f test %d\n\n", iam, neltsZr, neltsZr_tmp, neltsTotal, neltsTotal_tmp, relax_param, i); #endif /* Check to see if vtx belongs in the same supernode as vtx-1 */ supno[vtx_lid] = nsuper_loc; if (vtx == fstVtx_blk) { prval_cursn = prval_curvtx; neltsTotal += neltsVtx_L + neltsVtx_U; } else { if (maxNeltsVtx > 0) { relax_seps = (float) neltsVtx_L / (float) maxNeltsVtx; relax_seps *= (float) (neltsVtx_U+1) / (float) maxNeltsVtx; } else relax_seps = 0.0; /* check if all upper separators are dense */ if (relax_seps >= PS->relax_seps ) { VInfo->filledSep = FILLED_SEPS; *p_nextl = xlsub[vtx_lid]; *p_nextu = xusub[vtx_lid]; nsuper_loc += 1; *p_nsuper_loc = nsuper_loc; if (mem_error = dnsUpSeps_symbfact (n, iam, szSep, ind_sizes1, ind_sizes2, sizes, fstVtxSep, vtx, Llu_symbfact, Pslu_freeable, VInfo, CS, PS, p_nextl, p_nextu, p_nsuper_loc)) return (mem_error); /* set up neltsZr and neltsTotal */ vtx = lstVtx_blk; return 0; } /* if all upper separators are dense */ else { if (relax_param >= PS->relax_gen) { /* vertex belongs to the same supernode */ if (prval_cursn > prval_curvtx || prval_cursn <= vtx) prval_cursn = prval_curvtx; neltsZr = neltsZr_tmp; neltsTotal = neltsTotal_tmp; szsn ++; /* add artificial zeros at the structure of current supernode */ newnext = xlsub[snrep_lid+1]; if (neltsZrSn_L != 0) { for (k = xlsub[snrep_lid]; k < xlsub[snrep_lid+1]; k++) { vtx_elt = lsub[k]; if (vtx_elt >= vtx) marker[vtx_elt] = markl2_vtx; } for (k = xlsub[vtx_lid]; k < nextl; k++) { vtx_elt = lsub[k]; if (marker[vtx_elt] != markl2_vtx) { /* add vtx_elt to the structure of snrep */ lsub[newnext] = vtx_elt; newnext ++; marker[vtx_elt] = markl2_vtx; } } xlsub[snrep_lid+1] = newnext; } xlsub[vtx_lid] = newnext; nextl = newnext; neltsVtx_L += neltsZrVtx_L; newnext = xusub[snrep_lid+1]; if (neltsZrSn_U != 0) { for (k = xusub[snrep_lid]; k < xusub[snrep_lid+1]; k++) { vtx_elt = usub[k]; if (vtx_elt >= vtx) { if (marker[vtx_elt] == markl2_vtx) if (prval_cursn > vtx_elt && vtx_elt != vtx) prval_cursn = vtx_elt; marker[vtx_elt] = marku2_vtx; } } for (k = xusub[vtx_lid]; k < nextu; k++) { vtx_elt = usub[k]; if (marker[vtx_elt] != marku2_vtx) { /* add vtx_elt to the structure of snrep */ usub[newnext] = vtx_elt; newnext ++; if (marker[vtx_elt] == markl2_vtx) if (prval_cursn > vtx_elt && vtx_elt != vtx) prval_cursn = vtx_elt; marker[vtx_elt] = marku2_vtx; } } if (marker[vtxp1] == marku2_vtx) vtx_bel_snU = vtxp1; xusub[snrep_lid+1] = newnext; } xusub[vtx_lid] = newnext; nextu = newnext; neltsVtx_U += neltsZrVtx_U; } /* if ( relax_param >= PS->relax_param) */ } /* if (VInfo->filledSep != FILLED_SEPS) */ } /* if (vtx != fstVtx_blk) */ if ((relax_param < PS->relax_gen || vtx == lstVtx_blk-1) && VInfo->filledSep != FILLED_SEPS) { /* if a new supernode starts or is the last vertex */ /* vtx starts a new supernode. Note we only store the * subscript set of the first column of a supernode. */ if (marker[vtxp1] == marku1_vtx) vtx_bel_snU = vtxp1; /* build the pruned structure */ if (relax_param < PS->relax_gen && vtx == lstVtx_blk - 1 && vtx != fstVtx_blk) szLp = 2; else szLp = 1; if (vtx == fstVtx_blk) { xlsub_snp1 = nextl; xusub_snp1 = nextu; } else { xlsub_snp1 = xlsub[snrep_lid+1]; xusub_snp1 = xusub[snrep_lid+1]; } while (szLp > 0) { szLp --; #ifdef TEST_SYMB printf ("End sn %d szsn %d\n", nsuper_loc, szsn); printf ("BLD pr vtx %d snrep %d prval %d szLp %d\n", vtx, snrep, prval_cursn, szLp); #endif update_prGraph (iam, n, fstVtx_blk, lstVtx_blk, snrep_lid, pr_offset, prval_cursn, xlsub_snp1, 1, Pslu_freeable, Llu_symbfact, PS); update_prGraph (iam, n, fstVtx_blk, lstVtx_blk, snrep_lid, pr_offset, prval_cursn, xusub_snp1, 0, Pslu_freeable, Llu_symbfact, PS); #ifdef TEST_SYMB printf ("Adr lsub %p usub %p lsub %p pos %d usub %p pos %d\n", &(lsub[xlsub[snrep_lid]]), &(usub[xusub[snrep_lid]]), lsub, xlsub[snrep_lid], usub, xusub[snrep_lid]); PrintInt10 ("Lsn", xlsub_snp1 - xlsub[snrep_lid], &(lsub[xlsub[snrep_lid]])); PrintInt10 ("Usn", xusub_snp1 - xusub[snrep_lid], &(usub[xusub[snrep_lid]])); #endif if (prval_cursn >= lstVtx_blk) { neltSn_L = xlsub_snp1 - xlsub[snrep_lid]; neltSn_U = xusub_snp1 - xusub[snrep_lid]; if (ind_sizes1 != 0) { CS->snd_intraSz += neltSn_L + neltSn_U + 4; CS->snd_LintraSz += neltSn_L + 2; } if (prval_cursn >= lstVtx) { /* this supernode will be send to next layers of the tree */ lvl_tmp = lvl; ii = ind_sizes1; jj = ind_sizes2; szSep_tmp = szSep; lstVtx_tmp = lstVtx; while (prval_cursn >= lstVtx_tmp && szSep_tmp != 1) { jj = ii + szSep_tmp + (jj - ii) / 2; ii += szSep_tmp; lvl_tmp ++; szSep_tmp = szSep_tmp / 2; lstVtx_tmp = fstVtxSep[jj] + sizes[jj]; CS->snd_interSz[lvl_tmp] += neltSn_L + neltSn_U + 4; CS->snd_LinterSz[lvl_tmp] += neltSn_L + 2; if (CS->snd_vtxinter[lvl_tmp] == EMPTY) CS->snd_vtxinter[lvl_tmp] = snrep; } } } snrep = vtx; snrep_lid = vtx_lid; prval_cursn = prval_curvtx; szsn = 1; xlsub_snp1 = nextl; xusub_snp1 = nextu; } if (relax_param < PS->relax_gen) { neltsTotal += neltsVtx_L + neltsVtx_U; nsuper_loc ++; supno[vtx_lid] = nsuper_loc; if (marker[vtxp1] == marku1_vtx) vtx_bel_snU = vtxp1; else vtx_bel_snU = EMPTY; } } if (vtx == lstVtx_blk - 1) nsuper_loc ++; /* check if current separator is dense */ if (!VInfo->filledSep) { relax_seps = (float) neltsVtx_CSep_L / (float) (lstVtx - vtx); relax_seps *= (float) (neltsVtx_CSep_U+1) / (float) (lstVtx - vtx); if (relax_seps >= PS->relax_curSep ) VInfo->filledSep = FILLED_SEP; } maxNeltsVtx --; } *p_mark = marku2_vtx + 1; *p_nextl = nextl; *p_nextu = nextu; *p_neltsZr = neltsZr; *p_neltsTotal = neltsTotal; *p_nsuper_loc = nsuper_loc; return 0; } static void domain_symbfact (SuperMatrix *A, int iam, /* Input - my processor number */ int lvl, /* Input - current level in the separator tree */ int szSep, /* Input - size of the current separator (node) */ int ind_sizes1, int ind_sizes2, int_t *sizes, /* Input - sizes of each node in the separator tree */ int_t *fstVtxSep, /* Input - first vertex of each node in the tree */ int_t fstVtx, /* Input - first vertex of current node */ int_t lstVtx, /* Input - last vertex of current node */ Pslu_freeable_t *Pslu_freeable, /* global LU data structures (modified) */ Llu_symbfact_t *Llu_symbfact, /* Input/Output - local L, U data structures */ vtcsInfo_symbfact_t *VInfo, /* Input/Output - local info on vertices distribution */ comm_symbfact_t *CS, psymbfact_stat_t *PS, int_t *marker, int_t *p_mark, /* marker used to merge elements of vertices */ int_t *p_nextl, /* ptr to nextl in lsub structure */ int_t *p_nextu, /* ptr to nextu in usub structure */ int_t *p_neltsZr, /* no of artificial zeros introduced so far */ int_t *p_neltsTotal, /* no of nonzeros (including artificials) computed so far */ int_t *p_nsuper_loc ) { int_t lstVtx_lid, maxNvtcsPProc; /* call blk_symbfact */ blk_symbfact (A, iam, lvl, szSep, ind_sizes1, ind_sizes2, sizes, fstVtxSep, EMPTY, fstVtx, lstVtx, NULL, EMPTY, NULL, EMPTY, Pslu_freeable, Llu_symbfact, VInfo, CS, PS, marker, p_mark, p_nextl, p_nextu, p_neltsZr, p_neltsTotal, p_nsuper_loc); if (VInfo->filledSep != FILLED_SEPS) { maxNvtcsPProc = Pslu_freeable->maxNvtcsPProc; if (fstVtx >= lstVtx) lstVtx_lid = 0; else lstVtx_lid = LOCAL_IND( Pslu_freeable->globToLoc[lstVtx-1] ) + 1; VInfo->xlsub_nextLvl = Llu_symbfact->xlsub[lstVtx_lid]; Llu_symbfact->xlsub[lstVtx_lid] = *p_nextl; VInfo->xusub_nextLvl = Llu_symbfact->xusub[lstVtx_lid]; Llu_symbfact->xusub[lstVtx_lid] = *p_nextu; } VInfo->maxNeltsVtx -= lstVtx - fstVtx; } /*! \brief * *
 * Compute counts of rows/columns of current separator.
 * cntelt_vtcs[i] is 0 when i is nonzero before current separator
 * and n when i is zero before current separator.
 *
 * Set up nvtcsLvl_loc.
 * 
*/ static void initLvl_symbfact ( int_t n, /* Input - order of the matrix */ int iam, /* Input - my processor number */ int_t fstVtx, /* Input - first vertex of current node */ int_t lstVtx, /* Input - last vertex of current node */ Pslu_freeable_t *Pslu_freeable, Llu_symbfact_t *Llu_symbfact, /* Input/Output - local L, U data structures */ vtcsInfo_symbfact_t *VInfo, /* Input/Output - local info on vertices distribution */ psymbfact_stat_t *PS, MPI_Comm ndComm, int_t *marker, int_t nextl, int_t nextu ) { int_t *cntelt_vtcs, x_aind_beg, x_aind_end, x_aind_beg_l, x_aind_beg_u, nelts_asup, nelts_ainf; int_t nvtcsLvl_loc, fstVtx_loc, fstVtx_loc_lid, fstVtx_nextLvl; int_t curblk_loc, nblks_loc, ind_blk; int_t *lsub, *xlsub, *usub, *xusub; int_t *begEndBlks_loc, code_err, mem_error; int_t i, j, k, vtx, vtx_lid, fstVtx_blk, lstVtx_blk, vtx_elt, p, fill; int_t nelts, nelts_fill_l, nelts_fill_u, nelts_cnts, maxNvtcsPProc, *globToLoc; int_t use_fillcnts, cntelt_vtx_l, cntelt_vtx_u; MPI_Status status; fill = PS->fill_par; VInfo->filledSep = FALSE; /* Initializations */ maxNvtcsPProc = Pslu_freeable->maxNvtcsPProc; globToLoc = Pslu_freeable->globToLoc; curblk_loc = VInfo->curblk_loc; nblks_loc = VInfo->nblks_loc; begEndBlks_loc = VInfo->begEndBlks_loc; cntelt_vtcs = Llu_symbfact->cntelt_vtcs; lsub = Llu_symbfact->lsub; xlsub = Llu_symbfact->xlsub; usub = Llu_symbfact->usub; xusub = Llu_symbfact->xusub; /* compute nvtcsLvl_loc */ nvtcsLvl_loc = 0; ind_blk = curblk_loc; while (fstVtx > begEndBlks_loc[ind_blk] && ind_blk < 2 * nblks_loc) { ind_blk += 2; } curblk_loc = ind_blk; fstVtx_loc = begEndBlks_loc[ind_blk]; while (begEndBlks_loc[ind_blk] < lstVtx && ind_blk < 2 * nblks_loc) { nvtcsLvl_loc += begEndBlks_loc[ind_blk + 1] - begEndBlks_loc[ind_blk]; ind_blk += 2; } fstVtx_nextLvl = begEndBlks_loc[ind_blk]; VInfo->nvtcsLvl_loc = nvtcsLvl_loc; VInfo->curblk_loc = curblk_loc; fstVtx_loc_lid = LOCAL_IND( globToLoc[fstVtx_loc] ); vtx_lid = fstVtx_loc_lid; x_aind_beg_l = VInfo->xlsub_nextLvl; x_aind_beg_u = VInfo->xusub_nextLvl; nelts_cnts = 0; nelts_fill_l = 0; nelts_fill_u = 0; ind_blk = curblk_loc; while (begEndBlks_loc[ind_blk] < lstVtx && ind_blk < 2 * nblks_loc) { fstVtx_blk = begEndBlks_loc[ind_blk]; lstVtx_blk = begEndBlks_loc[ind_blk + 1]; ind_blk += 2; for (vtx = fstVtx_blk; vtx < lstVtx_blk; vtx++, vtx_lid ++) nelts_cnts += cntelt_vtcs[vtx_lid]; nelts_fill_l += fill * (xlsub[vtx_lid] - x_aind_beg_l); nelts_fill_u += fill * (xusub[vtx_lid] - x_aind_beg_u); x_aind_beg_l = xlsub[vtx_lid]; x_aind_beg_u = xusub[vtx_lid]; } if (nvtcsLvl_loc != 0) { nelts_ainf = xlsub[vtx_lid] - VInfo->xlsub_nextLvl; nelts_asup = xusub[vtx_lid] - VInfo->xusub_nextLvl; } else { nelts_ainf = 0; nelts_asup = 0; } use_fillcnts = FALSE; if (nextl + nelts_cnts >= Llu_symbfact->szLsub - nelts_ainf || nextu + nelts_cnts >= Llu_symbfact->szUsub - nelts_asup) { use_fillcnts = TRUE; } use_fillcnts = TRUE; if (use_fillcnts) { if (nextl + nelts_fill_l >= Llu_symbfact->szLsub - nelts_ainf) mem_error = psymbfact_LUXpandMem (iam, n, fstVtx, nextl, nextl + nelts_fill_l, LSUB, RL_SYMB, 1, Pslu_freeable, Llu_symbfact, VInfo, PS); lsub = Llu_symbfact->lsub; if (nextu + nelts_fill_u >= Llu_symbfact->szUsub - nelts_asup) mem_error = psymbfact_LUXpandMem (iam, n, fstVtx, nextu, nextu + nelts_fill_u, USUB, RL_SYMB, 1, Pslu_freeable, Llu_symbfact, VInfo, PS); usub = Llu_symbfact->usub; } /* init xlsub[fstVtx:lstVtx] and xusub[fstVtx:lstVtx] and copy elements of A[fstVtx:lstVtx, fstVtx:lstVtx] in lsub and usub */ fstVtx_loc_lid = LOCAL_IND( globToLoc[fstVtx_loc] ); x_aind_beg_l = VInfo->xlsub_nextLvl; x_aind_beg_u = VInfo->xusub_nextLvl; vtx_lid = fstVtx_loc_lid; ind_blk = curblk_loc; while (begEndBlks_loc[ind_blk] < lstVtx && ind_blk < 2 * nblks_loc) { fstVtx_blk = begEndBlks_loc[ind_blk]; lstVtx_blk = begEndBlks_loc[ind_blk + 1]; ind_blk += 2; for (vtx = fstVtx_blk; vtx < lstVtx_blk; vtx++, vtx_lid ++) { if (vtx_lid != fstVtx_loc_lid) { x_aind_beg_l = xlsub[vtx_lid]; x_aind_beg_u = xusub[vtx_lid]; } if (use_fillcnts) { cntelt_vtx_l = fill * (xlsub[vtx_lid+1] - x_aind_beg_l); cntelt_vtx_u = fill * (xusub[vtx_lid+1] - x_aind_beg_u); } else { cntelt_vtx_l = cntelt_vtcs[vtx_lid]; cntelt_vtx_u = cntelt_vtcs[vtx_lid]; } x_aind_end = xlsub[vtx_lid + 1]; Llu_symbfact->cntelt_vtcsA_lvl[vtx_lid - fstVtx_loc_lid] = CEILING( (xlsub[vtx_lid+1]-x_aind_beg_l + xusub[vtx_lid+1]-x_aind_beg_u), 2); xlsub[vtx_lid] = nextl; nelts = 0; for (k = x_aind_beg_l; k < x_aind_end; k++) { lsub[nextl] = lsub[k]; nextl ++; nelts ++; } if (nelts < cntelt_vtx_l) lsub[nextl] = EMPTY; nextl += cntelt_vtx_l - nelts; x_aind_end = xusub[vtx_lid + 1]; xusub[vtx_lid] = nextu; nelts = 0; for (k = x_aind_beg_u; k < x_aind_end; k++) { usub[nextu] = usub[k]; nextu ++; nelts ++; } if (nelts < cntelt_vtx_u) usub[nextu] = EMPTY; nextu += cntelt_vtx_u - nelts; } } if (nvtcsLvl_loc == 0) { if (curblk_loc == 0) vtx_lid = 0; else { if (begEndBlks_loc[curblk_loc-1] == 0) vtx_lid = 0; else vtx_lid = LOCAL_IND( globToLoc[begEndBlks_loc[curblk_loc-1] - 1] ) + 1; } xlsub[vtx_lid] = nextl; xusub[vtx_lid] = nextu; } else { VInfo->xlsub_nextLvl = xlsub[vtx_lid]; xlsub[vtx_lid] = nextl; VInfo->xusub_nextLvl = xusub[vtx_lid]; xusub[vtx_lid] = nextu; if (PS->estimLSz < nextl) PS->estimLSz = nextl; if (PS->estimUSz < nextu) PS->estimUSz = nextu; VInfo->nnz_ainf_loc -= nelts_ainf; VInfo->nnz_asup_loc -= nelts_asup; } VInfo->fstVtx_nextLvl = fstVtx_nextLvl; } static int_t expand_RL ( int_t computeRcvd, /* if = 1, then update from receive buffer, else update from own data */ int_t n, int iam, /* process number */ int_t *lsub_rcvd, /* elements of node */ int_t lsub_rcvd_sz, /* size of sub to be explored */ int_t *usub_rcvd, int_t usub_rcvd_sz, int_t vtxXp, int_t vtx_upd_pr, /* ind in pruned structure of upd vertex which doesn't fit into the alloc memory */ int_t lstVtx_upd_pr, /* ind in pruned structure of lst vtx to update */ int_t fstVtx_srcUpd, /* first vertex source of the updates */ int_t lstVtx_srcUpd, /* last vertex source of the updates */ int_t fstVtx_toUpd, /* first vertex to update */ int_t lstVtx_toUpd, /* last vertex to update */ int_t nvtcs_toUpd, /* no of vertices to update */ int computeL, int_t *pmarkl, int_t *marker, Pslu_freeable_t *Pslu_freeable, Llu_symbfact_t *Llu_symbfact, /* Input/Output - local L, U data structures */ vtcsInfo_symbfact_t *VInfo, /* Input/Output - local info on vertices distribution */ psymbfact_stat_t *PS ) { int_t fstVtx_toUpd_lid, vtx_lid, vtx, vtx_elt, vtx_elt_lid, nextl, nelts_in; int_t i, ii, j, nelts, nelts_vtx, mpnelts, lvtx_lid, elt, vtxXp_lid; int_t *xusubPr, *usubPr, *xlsub, *lsub, *xusub, *usub; int_t markl, *globToLoc, maxNvtcsPProc; int_t mem_error, len_texp; maxNvtcsPProc = Pslu_freeable->maxNvtcsPProc; globToLoc = Pslu_freeable->globToLoc; xusubPr = Llu_symbfact->xlsubPr; usubPr = Llu_symbfact->lsubPr; if (computeL) { xlsub = Llu_symbfact->xlsub; lsub = Llu_symbfact->lsub; xusub = Llu_symbfact->xusub; usub = Llu_symbfact->usub; } else { xlsub = Llu_symbfact->xusub; lsub = Llu_symbfact->usub; xusub = Llu_symbfact->xlsub; usub = Llu_symbfact->lsub; } markl = *pmarkl + 1; fstVtx_toUpd_lid = LOCAL_IND( globToLoc[fstVtx_toUpd] ); vtxXp_lid = LOCAL_IND( globToLoc[vtxXp] ); nextl = xlsub[vtxXp_lid+1]; lvtx_lid = EMPTY; if (lstVtx_srcUpd != EMPTY) lvtx_lid = LOCAL_IND( globToLoc[lstVtx_srcUpd - 1] ); /* count the number of new elements, and update Llu_symbfact->cntelt_vtcs */ vtx_lid = fstVtx_toUpd_lid; vtx_lid += vtx_upd_pr; len_texp = 0; for (i = vtx_upd_pr; i < lstVtx_upd_pr; i++, vtx_lid ++) { nelts_vtx = xlsub[vtx_lid+1] - xlsub[vtx_lid]; if (xusubPr[i] != xusubPr[i+1]) { j = xusubPr[i]; vtx = usubPr[j]; /* setup marker structure for already existing elements */ ii = xlsub[vtx_lid]; while (lsub[ii] != EMPTY && ii < xlsub[vtx_lid + 1]) { marker[lsub[ii]] = markl; ii ++; } nelts_vtx = ii - xlsub[vtx_lid]; for (j = xusubPr[i] + 1; j < xusubPr[i+1]; j++) { vtx_elt = usubPr[j]; ii = marker[vtx_elt]; if (computeRcvd) { nelts = lsub_rcvd[ii + NELTS_IND]; ii += RCVD_IND; mpnelts = marker[vtx_elt] + nelts + RCVD_IND; } else { vtx_elt_lid = LOCAL_IND( globToLoc[vtx_elt] ); if (vtx_elt_lid == lvtx_lid) nelts = lsub_rcvd_sz - ii; else nelts = xlsub[vtx_elt_lid+1] - xlsub[vtx_elt_lid]; mpnelts = marker[vtx_elt] + nelts; } if (!computeL) marker[vtx] = markl; for (ii; ii < mpnelts; ii++) { elt = lsub_rcvd[ii]; if (elt >= vtx) { if (marker[elt] != markl) { /* add elt to structure of vtx */ marker[elt] = markl; nelts_vtx ++; } } } } if (nelts_vtx != 0 && (nelts_vtx > xlsub[vtx_lid+1] - xlsub[vtx_lid])) { nelts_in = xlsub[vtx_lid+1] - xlsub[vtx_lid]; if (nelts_in == 0) nelts_in = 1; j = nelts_vtx / nelts_in; if (nelts_vtx % nelts_in != 0) j++; nelts_vtx = j * nelts_in; } else nelts_vtx = xlsub[vtx_lid+1] - xlsub[vtx_lid]; markl ++; if (markl == n) { /* reset marker array */ for (j = fstVtx_toUpd; j < n; j++) marker[j] = EMPTY; markl = 0; } } Llu_symbfact->cntelt_vtcs[vtx_lid] = nelts_vtx; len_texp += nelts_vtx; } for (; i < nvtcs_toUpd; i++, vtx_lid++) { nelts_vtx = xlsub[vtx_lid+1] - xlsub[vtx_lid]; Llu_symbfact->cntelt_vtcs[vtx_lid] = nelts_vtx; len_texp += nelts_vtx; } *pmarkl = markl; /* mark elements array */ for (i = xlsub[vtxXp_lid]; i < nextl; i++) { marker[lsub[i]] = markl; } nextl = xlsub[vtxXp_lid+1]; if (mem_error = psymbfact_LUXpand_RL (iam, n, vtxXp, nextl, len_texp, computeL, Pslu_freeable, Llu_symbfact, VInfo, PS)) return (mem_error); return 0; } static int_t rl_update ( int computeRcvd, /* if = 1, then update from receive buffer, else update from own data */ int_t n, int iam, /* process number */ int_t *lsub_rcvd, /* elements of node */ int_t lsub_rcvd_sz, /* size of sub to be explored */ int_t *usub_rcvd, int_t usub_rcvd_sz, int_t fstVtx_srcUpd, /* first vertex source of the updates */ int_t lstVtx_srcUpd, /* last vertex source of the updates */ int_t indBlk_srcUpd, /* block index of first vertex */ int_t fstVtx_toUpd, /* first vertex to update */ int_t lstVtx_toUpd, /* last vertex to update */ int_t nvtcs_toUpd, /* no of vertices to update */ int computeL, int_t *pmarkl, int_t *marker, Pslu_freeable_t *Pslu_freeable, Llu_symbfact_t *Llu_symbfact, /* Input/Output - local L, U data structures */ vtcsInfo_symbfact_t *VInfo, /* Input/Output - local info on vertices distribution */ psymbfact_stat_t *PS /* marker: first elements of marker contain the nodes that will be used in the updates */ ) { int_t i, j, k, prVal, nelts, ind, nextl, ii, mpnelts, mem_error; int_t vtx, vtx_lid, vtx_elt, vtx_elt_lid, lvtx_lid; int_t fstVtx_toUpd_lid, markl, elt, vtx_loc, ind_blk; int_t *xusubPr, *usubPr, *xlsub, *lsub, *xusub, *usub; int_t fstVtx_upd, lstVtx_upd, maxNvtcsPProc, *globToLoc; int_t fstVtx_srcUpd_lid, nelts_vtx, expand; /* quick return */ if (fstVtx_toUpd >= lstVtx_toUpd) return 0; maxNvtcsPProc = Pslu_freeable->maxNvtcsPProc; globToLoc = Pslu_freeable->globToLoc; fstVtx_upd = EMPTY; lstVtx_upd = EMPTY; xusubPr = Llu_symbfact->xlsubPr; usubPr = Llu_symbfact->lsubPr; if (computeL) { xlsub = Llu_symbfact->xlsub; lsub = Llu_symbfact->lsub; xusub = Llu_symbfact->xusub; usub = Llu_symbfact->usub; } else { xlsub = Llu_symbfact->xusub; lsub = Llu_symbfact->usub; xusub = Llu_symbfact->xlsub; usub = Llu_symbfact->lsub; } markl = *pmarkl; fstVtx_toUpd_lid = LOCAL_IND( globToLoc[fstVtx_toUpd] ); /* count number of elements in transpose representation of usub_rcvd */ /* use marker to count those elements */ for (i = 0; i < nvtcs_toUpd; i++) marker[i] = 0; i = 0; if (fstVtx_srcUpd != EMPTY) { fstVtx_srcUpd_lid = LOCAL_IND( globToLoc[fstVtx_srcUpd] ); vtx_lid = fstVtx_srcUpd_lid; } lvtx_lid = EMPTY; if (lstVtx_srcUpd != EMPTY) lvtx_lid = LOCAL_IND( globToLoc[lstVtx_srcUpd - 1] ); while (i < usub_rcvd_sz) { if (computeRcvd) { vtx = usub_rcvd[i + DIAG_IND]; nelts = usub_rcvd[i + NELTS_IND]; i += RCVD_IND; } else { if (vtx_lid == lvtx_lid) nelts = usub_rcvd_sz - i; else nelts = xusub[vtx_lid + 1] - xusub[vtx_lid]; vtx_lid ++; } prVal = usub_rcvd[i]; for (k = i; k < i + nelts; k++) { vtx_elt = usub_rcvd[k]; if (vtx_elt > prVal) k = i + nelts; else { if (OWNER( globToLoc[vtx_elt] ) == iam) { if (vtx_elt >= fstVtx_toUpd && vtx_elt < lstVtx_toUpd) { vtx_elt_lid = LOCAL_IND( globToLoc[vtx_elt] ) - fstVtx_toUpd_lid; marker[vtx_elt_lid] ++; } } } } i += nelts; } ind = 0; for (i = 0; i < nvtcs_toUpd; i++) { if (marker[i] != 0) { marker[i] ++; if (fstVtx_upd == EMPTY) fstVtx_upd = i; lstVtx_upd = i; } xusubPr[i] = ind; ind += marker[i]; marker[i] = xusubPr[i]; } xusubPr[i] = ind; lstVtx_upd ++; if (ind == 0) /* quick return if no update */ return 0; /* test if enough memory in usubPr array */ if (ind > Llu_symbfact->szLsubPr) { if (mem_error = psymbfact_prLUXpand (iam, ind, LSUB_PR, Llu_symbfact, PS)) return (mem_error); usubPr = Llu_symbfact->lsubPr; } i = 0; if (fstVtx_srcUpd != EMPTY) { vtx_loc = fstVtx_srcUpd; vtx_lid = LOCAL_IND( globToLoc[vtx_loc] ); ind_blk = indBlk_srcUpd; } while (i < usub_rcvd_sz) { if (computeRcvd) { vtx = usub_rcvd[i + DIAG_IND]; nelts = usub_rcvd[i + NELTS_IND]; i += RCVD_IND; } else { vtx = vtx_loc; if (vtx_lid == lvtx_lid) nelts = usub_rcvd_sz - i; else nelts = xusub[vtx_lid + 1] - xusub[vtx_lid]; vtx_lid ++; vtx_loc ++; if (ind_blk != EMPTY) if (vtx_loc == VInfo->begEndBlks_loc[ind_blk+1]) { ind_blk += 2; vtx_loc = VInfo->begEndBlks_loc[ind_blk]; } } prVal = usub_rcvd[i]; for (k = i; k < i + nelts; k++) { vtx_elt = usub_rcvd[k]; if (vtx_elt > prVal) k = i + nelts; else { if (OWNER( globToLoc[vtx_elt]) == iam) { if (vtx_elt >= fstVtx_toUpd && vtx_elt < lstVtx_toUpd) { vtx_elt_lid = LOCAL_IND( globToLoc[vtx_elt] ) - fstVtx_toUpd_lid; /* add vtx_elt to the pruned structure */ if (marker[vtx_elt_lid] == xusubPr[vtx_elt_lid]) { usubPr[marker[vtx_elt_lid]] = vtx_elt; marker[vtx_elt_lid] ++; } usubPr[marker[vtx_elt_lid]] = vtx; marker[vtx_elt_lid] ++; } } } } i += nelts; } /* reset marker array */ for (i = 0; i < nvtcs_toUpd; i++) marker[i] = EMPTY; if (fstVtx_srcUpd != EMPTY) { vtx_loc = fstVtx_srcUpd; vtx_lid = LOCAL_IND( globToLoc[vtx_loc] ); ind_blk = indBlk_srcUpd; } i = 0; while (i < lsub_rcvd_sz) { if (computeRcvd) { vtx = lsub_rcvd[i + DIAG_IND]; nelts = lsub_rcvd[i + NELTS_IND]; marker[vtx] = i; i += RCVD_IND; } else { vtx = vtx_loc; if (vtx_lid == lvtx_lid) nelts = lsub_rcvd_sz - i; else nelts = xlsub[vtx_lid + 1] - xlsub[vtx_lid]; vtx_lid ++; marker[vtx] = i; vtx_loc ++; if (ind_blk != EMPTY) if (vtx_loc == VInfo->begEndBlks_loc[ind_blk+1]) { ind_blk += 2; vtx_loc = VInfo->begEndBlks_loc[ind_blk]; } } i += nelts; } /* use the pruned structure to update symbolic factorization */ vtx_lid = fstVtx_toUpd_lid; vtx_lid += fstVtx_upd; for (i = fstVtx_upd; i < lstVtx_upd; i++, vtx_lid ++) { if (xusubPr[i] != xusubPr[i+1]) { j = xusubPr[i]; vtx = usubPr[j]; /* setup marker structure for already existing elements */ ii = xlsub[vtx_lid]; while (lsub[ii] != EMPTY && ii < xlsub[vtx_lid + 1]) { marker[lsub[ii]] = markl; ii ++; } PS->nops += ii - xlsub[vtx_lid]; nextl = ii; for (j = xusubPr[i] + 1; j < xusubPr[i+1]; j++) { vtx_elt = usubPr[j]; ii = marker[vtx_elt]; if (computeRcvd) { nelts = lsub_rcvd[ii + NELTS_IND]; ii += RCVD_IND; mpnelts = marker[vtx_elt] + nelts + RCVD_IND; } else { vtx_elt_lid = LOCAL_IND( globToLoc[vtx_elt] ); if (vtx_elt_lid == lvtx_lid) nelts = lsub_rcvd_sz - ii; else nelts = xlsub[vtx_elt_lid+1] - xlsub[vtx_elt_lid]; mpnelts = marker[vtx_elt] + nelts; } if (!computeL) marker[vtx] = markl; PS->nops += mpnelts - ii; for (ii; ii < mpnelts; ii++) { elt = lsub_rcvd[ii]; if (elt >= vtx) { if (marker[elt] != markl) { /* add elt to structure of vtx */ if (nextl >= xlsub[vtx_lid + 1]) { if (mem_error = expand_RL (computeRcvd, n, iam, lsub_rcvd, lsub_rcvd_sz, usub_rcvd, usub_rcvd_sz, vtx, i, lstVtx_upd, fstVtx_srcUpd, lstVtx_srcUpd, fstVtx_toUpd, lstVtx_toUpd, nvtcs_toUpd, computeL, &markl, marker, Pslu_freeable, Llu_symbfact, VInfo, PS)) return (mem_error); if (computeL) { lsub = Llu_symbfact->lsub; if (!computeRcvd) lsub_rcvd = &(Llu_symbfact->lsub[Llu_symbfact->xlsub[fstVtx_srcUpd_lid]]); } else { marker[vtx] = markl; lsub = Llu_symbfact->usub; if (!computeRcvd) lsub_rcvd = &(Llu_symbfact->usub[Llu_symbfact->xusub[fstVtx_srcUpd_lid]]); } } lsub[nextl] = elt; nextl ++; marker[elt] = markl; } } } } if (nextl < xlsub[vtx_lid+1]) lsub[nextl] = EMPTY; markl ++; if (markl == n) { /* reset marker array */ for (j = fstVtx_toUpd; j < n; j++) marker[j] = EMPTY; markl = 0; } } } *pmarkl = markl; return 0; } static int_t dnsUpSeps_symbfact ( int_t n, int iam, /* my processor number */ int szSep, int ind_sizes1, int ind_sizes2, int_t *sizes, /* Input - sizes of each node in the separator tree */ int_t *fstVtxSep, /* Input - first vertex of each node in the tree */ int_t fstVtx_dns, Llu_symbfact_t *Llu_symbfact, /* Input/Output - local L, U data structures */ Pslu_freeable_t *Pslu_freeable, vtcsInfo_symbfact_t *VInfo, /* Input/Output - local info on vertices distribution */ comm_symbfact_t *CS, psymbfact_stat_t *PS, int_t *p_nextl, /* ptr to nextl in lsub structure */ int_t *p_nextu, /* ptr to nextu in usub structure */ int_t *p_nsuper_loc ) { int_t nextl, nextu, nsuper_loc, curblk_loc, mem_error; int_t vtx_elt, ind_blk, vtx, k; int_t *xlsub, *xusub, *lsub, *usub; int_t fstVtx_blk, fstVtx_blk_lid, vtx_lid, lstVtx_blk, fstVtx_lvl, lstVtx_lvl; int_t *globToLoc, maxNvtcsPProc; /* Initialization */ xlsub = Llu_symbfact->xlsub; lsub = Llu_symbfact->lsub; xusub = Llu_symbfact->xusub; usub = Llu_symbfact->usub; globToLoc = Pslu_freeable->globToLoc; maxNvtcsPProc = Pslu_freeable->maxNvtcsPProc; nextl = *p_nextl; nextu = *p_nextu; nsuper_loc = *p_nsuper_loc; curblk_loc = VInfo->curblk_loc; VInfo->nnz_ainf_loc = 0; VInfo->nnz_asup_loc = 0; if (fstVtx_dns == EMPTY) fstVtx_blk = VInfo->begEndBlks_loc[curblk_loc]; else fstVtx_blk = fstVtx_dns; if (fstVtx_blk == n) return 0; fstVtx_blk_lid = LOCAL_IND( globToLoc[fstVtx_blk] ); vtx_lid = fstVtx_blk_lid; xlsub[vtx_lid] = nextl; xusub[vtx_lid] = nextu; PS->nDnsUpSeps = 0; while (szSep >= 1) { PS->nDnsUpSeps++; fstVtx_lvl = fstVtxSep[ind_sizes2]; lstVtx_lvl = fstVtxSep[ind_sizes2] + sizes[ind_sizes2]; if (fstVtx_blk > fstVtx_lvl) vtx_elt = fstVtx_blk; else vtx_elt = fstVtx_lvl; if (nextl + lstVtx_lvl - vtx_elt >= Llu_symbfact->szLsub) { if (mem_error = psymbfact_LUXpandMem (iam, n, fstVtx_blk, nextl, nextl + fstVtx_lvl - vtx_elt, LSUB, DNS_UPSEPS, 1, Pslu_freeable, Llu_symbfact, VInfo, PS)) return (mem_error); lsub = Llu_symbfact->lsub; } if (nextu + lstVtx_lvl - vtx_elt >= Llu_symbfact->szUsub) { if (mem_error = psymbfact_LUXpandMem (iam, n, fstVtx_blk, nextu, nextu + fstVtx_lvl - vtx_elt, LSUB, DNS_UPSEPS, 1, Pslu_freeable, Llu_symbfact, VInfo, PS)) return (mem_error); usub = Llu_symbfact->usub; } PS->nops += 2 * (lstVtx_lvl - vtx_elt); for (; vtx_elt < lstVtx_lvl; vtx_elt++) { lsub[nextl] = vtx_elt; nextl++; usub[nextu] = vtx_elt; nextu++; } ind_sizes2 = ind_sizes1 + szSep + (ind_sizes2 - ind_sizes1) / 2; ind_sizes1 += szSep; szSep = szSep / 2; } /* delete the diagonal element from the U structure */ usub[xusub[fstVtx_blk_lid]] = usub[nextu - 1]; nextu --; xlsub[fstVtx_blk_lid+1] = nextl; xusub[fstVtx_blk_lid+1] = nextu; vtx_lid = fstVtx_blk_lid; ind_blk = curblk_loc; while (ind_blk < 2 * VInfo->nblks_loc) { if (ind_blk != curblk_loc) { fstVtx_blk = VInfo->begEndBlks_loc[ind_blk]; xlsub[vtx_lid] = nextl; xusub[vtx_lid] = nextu; for (k = xlsub[fstVtx_blk_lid]; k < xlsub[fstVtx_blk_lid+1]; k++) if (lsub[k] >= fstVtx_blk) { lsub[nextl] = lsub[k]; nextl ++; if (nextl >= MEM_LSUB( Llu_symbfact, VInfo )) if (mem_error = psymbfact_LUXpandMem (iam, n, fstVtx_blk, nextl, 0, LSUB, DNS_UPSEPS, 1, Pslu_freeable, Llu_symbfact, VInfo, PS)) return (mem_error); lsub = Llu_symbfact->lsub; } for (k = xusub[fstVtx_blk_lid]; k < xusub[fstVtx_blk_lid+1]; k++) if (usub[k] > fstVtx_blk) { usub[nextu] = usub[k]; nextu ++; if (nextu >= MEM_USUB( Llu_symbfact, VInfo )) if (mem_error = psymbfact_LUXpandMem (iam, n, fstVtx_blk, nextu, 0, USUB, DNS_UPSEPS, 1, Pslu_freeable, Llu_symbfact, VInfo, PS)) return (mem_error); usub = Llu_symbfact->usub; } PS->nops += xlsub[fstVtx_blk_lid+1] - xlsub[fstVtx_blk_lid]; PS->nops += xusub[fstVtx_blk_lid+1] - xusub[fstVtx_blk_lid]; } lstVtx_blk = VInfo->begEndBlks_loc[ind_blk + 1]; for (vtx = fstVtx_blk; vtx < lstVtx_blk; vtx++, vtx_lid++) { Pslu_freeable->supno_loc[vtx_lid] = nsuper_loc; if (vtx > fstVtx_blk) { xlsub[vtx_lid] = nextl; xusub[vtx_lid] = nextu; } } ind_blk += 2; nsuper_loc ++; } *p_nextl = nextl; *p_nextu = nextu; *p_nsuper_loc = nsuper_loc; /* VInfo->curblk_loc = ind_blk; */ return 0; } static int_t dnsCurSep_symbfact ( int_t n, /* Input - order of the matrix */ int iam, /* Input - my processor number */ int ind_sizes1, int ind_sizes2, int_t *sizes, /* Input - sizes of each node in the separator tree */ int_t *fstVtxSep, /* Input - first vertex of each node in the tree */ int szSep, int npNode, int_t rcvd_dnsSep, int_t *p_nextl, int_t *p_nextu, int_t *p_mark, int_t *p_nsuper_loc, int_t *marker, /* temporary array of size n */ MPI_Comm ndCom, Llu_symbfact_t *Llu_symbfact, /* Input/Output - local L, U data structures */ Pslu_freeable_t *Pslu_freeable, vtcsInfo_symbfact_t *VInfo, /* Input/Output - local info on vertices distribution */ comm_symbfact_t *CS, psymbfact_stat_t *PS ) { int_t fstVtx_blk, fstVtx_dns, fstVtx_dns_lid, lstVtx_blk, fstVtx, lstVtx, lstVtx_dns_lid; int_t ind_blk, i, vtx, vtx_lid, vtx_lid_x, nvtcs_upd, save_cnt, mem_error; int_t computeL, computeU, vtx_elt, j, cur_blk, snlid, snrep; int_t *sub, *xsub, *minElt_vtx, *cntelt_vtcs; int_t mark, next, *x_newelts, *x_newelts_L, *x_newelts_U; int_t *newelts_L, *newelts_U, *newelts; int_t *globToLoc, maxNvtcsPProc, lvl; int_t prval, kmin, kmax, maxElt, ktemp, prpos; float mem_dnsCS; if (!rcvd_dnsSep) VInfo->curblk_loc += 2; computeL = TRUE; computeU = TRUE; lstVtx_dns_lid = EMPTY; globToLoc = Pslu_freeable->globToLoc; maxNvtcsPProc = Pslu_freeable->maxNvtcsPProc; fstVtx = fstVtxSep[ind_sizes2]; lstVtx = fstVtx + sizes[ind_sizes2]; cur_blk = VInfo->curblk_loc; fstVtx_dns = VInfo->begEndBlks_loc[cur_blk]; fstVtx_dns_lid = LOCAL_IND( globToLoc[fstVtx_dns] ); lvl = (int_t) LOG2( npNode ); x_newelts_U = NULL; newelts_L = NULL; newelts_U = NULL; mem_dnsCS = 0.; PS->nDnsCurSep ++; if (CS->rcv_bufSz > n - fstVtx_dns) minElt_vtx = CS->rcv_buf; else { if (!(minElt_vtx = intMalloc_symbfact(n - fstVtx_dns))) ABORT("Malloc fails for minElt_vtx[]."); mem_dnsCS += n - fstVtx_dns; } while (computeL || computeU) { if (computeL) { sub = Llu_symbfact->lsub; xsub = Llu_symbfact->xlsub; x_newelts = Llu_symbfact->cntelt_vtcs; x_newelts_L = x_newelts; } else { sub = Llu_symbfact->usub; xsub = Llu_symbfact->xusub; } /* use minElt_vtx to determine starting vertex of each nonzero element */ for (i = 0; i < n - fstVtx_dns; i++) minElt_vtx[i] = n; ind_blk = cur_blk; vtx_lid = fstVtx_dns_lid; nvtcs_upd = 0; while (VInfo->begEndBlks_loc[ind_blk] < lstVtx && ind_blk < 2 * VInfo->nblks_loc) { fstVtx_blk = VInfo->begEndBlks_loc[ind_blk]; lstVtx_blk = VInfo->begEndBlks_loc[ind_blk + 1]; ind_blk += 2; nvtcs_upd += lstVtx_blk - fstVtx_blk; for (vtx = fstVtx_blk; vtx < lstVtx_blk; vtx++, vtx_lid++) { j = xsub[vtx_lid]; while (j < xsub[vtx_lid+1] && sub[j] != EMPTY) { PS->nops ++; vtx_elt = sub[j] - fstVtx_dns; if (minElt_vtx[vtx_elt] == n) { minElt_vtx[vtx_elt] = vtx; } j ++; } } } if (!computeL) { if (!(x_newelts_U = intMalloc_symbfact(nvtcs_upd + 1))) ABORT("Malloc fails for x_newelts_U[]."); mem_dnsCS += nvtcs_upd + 1; x_newelts = x_newelts_U; } else { /* save the value in cntelt_vtcs[lstVtx_blk_lid] */ save_cnt = x_newelts[vtx_lid]; lstVtx_dns_lid = vtx_lid; } MPI_Allreduce (&(minElt_vtx[lstVtx - fstVtx_dns]), &(marker[lstVtx]), n - lstVtx, mpi_int_t, MPI_MIN, ndCom); #if ( PRNTlevel>=1 ) PS->no_msgsCol += (float) (2 * (int_t) LOG2( npNode )); PS->sz_msgsCol += (float) (n - lstVtx); if (PS->maxsz_msgCol < n - lstVtx) PS->maxsz_msgCol = n - lstVtx; #endif /* use x_newelts to determine counts of elements starting in each vertex */ for (vtx_lid = 0; vtx_lid < nvtcs_upd; vtx_lid++) x_newelts[vtx_lid] = 0; for (vtx = lstVtx; vtx < n; vtx++) { if (marker[vtx] != n) { vtx_elt = marker[vtx]; if (OWNER( globToLoc[vtx_elt] ) == iam) { x_newelts[ LOCAL_IND( globToLoc[vtx_elt] ) - fstVtx_dns_lid ] ++; } else { /* find the first vertex > vtx_elt which belongs to iam */ ind_blk = cur_blk; vtx_lid = 0; while (vtx_elt > VInfo->begEndBlks_loc[ind_blk] && ind_blk < 2 * VInfo->nblks_loc) { vtx_lid += VInfo->begEndBlks_loc[ind_blk+1] - VInfo->begEndBlks_loc[ind_blk]; ind_blk += 2; } if (VInfo->begEndBlks_loc[ind_blk] < lstVtx) { x_newelts[vtx_lid] ++; marker[vtx] = VInfo->begEndBlks_loc[ind_blk]; } else marker[vtx] = n; } } } /* set up beginning of new elements for each local vtx */ i = 0; for (vtx_lid = 0; vtx_lid < nvtcs_upd; vtx_lid++) { j = x_newelts[vtx_lid]; x_newelts[vtx_lid] = i; i += j; } x_newelts[vtx_lid] = i; newelts = NULL; if (i != 0) { if (!(newelts = intMalloc_symbfact(x_newelts[vtx_lid]))) ABORT("Malloc fails for newelts[]."); mem_dnsCS += x_newelts[vtx_lid]; for (vtx = lstVtx; vtx < n; vtx++) { if (marker[vtx] != n) { vtx_elt = marker[vtx]; vtx_lid = LOCAL_IND( globToLoc[vtx_elt] ) - fstVtx_dns_lid; newelts[x_newelts[vtx_lid]] = vtx; x_newelts[vtx_lid] ++; } } } /* reset beginning of new elements for each local vertex */ i = 0; for (vtx_lid = 0; vtx_lid < nvtcs_upd; vtx_lid++) { j = x_newelts[vtx_lid]; x_newelts[vtx_lid] = i; i = j; } if (computeL == TRUE) { computeL = FALSE; newelts_L = newelts; } else { computeU = FALSE; newelts_U = newelts; } } for (i = fstVtx_dns; i < n; i++) marker[i] = EMPTY; mark = 0; /* update vertices */ prval = n; ind_blk = cur_blk; fstVtx_dns = VInfo->begEndBlks_loc[ind_blk]; vtx_lid = LOCAL_IND( globToLoc[fstVtx_dns] ); while (VInfo->begEndBlks_loc[ind_blk] < lstVtx && ind_blk < 2 * VInfo->nblks_loc) { fstVtx_blk = VInfo->begEndBlks_loc[ind_blk]; lstVtx_blk = VInfo->begEndBlks_loc[ind_blk + 1]; ind_blk += 2; for (vtx = fstVtx_blk; vtx < lstVtx_blk; vtx++, vtx_lid++) { vtx_lid_x = vtx_lid - fstVtx_dns_lid; Llu_symbfact->xlsub[vtx_lid] = *p_nextl; Llu_symbfact->xusub[vtx_lid] = *p_nextu; if (vtx == fstVtx_blk || x_newelts_L[vtx_lid_x+1] != x_newelts_L[vtx_lid_x] || x_newelts_U[vtx_lid_x+1] != x_newelts_U[vtx_lid_x]) { /* a new supernode starts */ snlid = vtx_lid; snrep = vtx; if (mark + 2 > n) { /* reset to EMPTY marker array */ for (i = 0; i < n; i++) marker[i] = EMPTY; mark = 0; } computeL = TRUE; computeU = FALSE; while (computeL || computeU) { if (computeL) { sub = Llu_symbfact->lsub; xsub = Llu_symbfact->xlsub; x_newelts = x_newelts_L; newelts = newelts_L; next = *p_nextl; } else { sub = Llu_symbfact->usub; xsub = Llu_symbfact->xusub; x_newelts = x_newelts_U; newelts = newelts_U; next = *p_nextu; } xsub[vtx_lid] = next; /* TEST available memory */ j = x_newelts[vtx_lid_x+1] + lstVtx - vtx; if ((computeL && next+j >= MEM_LSUB(Llu_symbfact, VInfo)) || (computeU && next+j >= MEM_USUB(Llu_symbfact, VInfo))) { if (mem_error = psymbfact_LUXpandMem (iam, n, vtx, next, next + j, computeL, DNS_CURSEP, 1, Pslu_freeable, Llu_symbfact, VInfo, PS)) return (mem_error); if (computeL) sub = Llu_symbfact->lsub; else sub = Llu_symbfact->usub; } if (computeL) i = vtx; else i = vtx+1; while (i < lstVtx) { sub[next] = i; next ++; i ++; } PS->nops += x_newelts[vtx_lid_x+1]; for (i = 0; i < x_newelts[vtx_lid_x+1]; i++) { vtx_elt = newelts[i]; sub[next] = vtx_elt; next ++; if (computeU && vtx_elt < prval && marker[vtx_elt] == mark-1) prval = vtx_elt; marker[vtx_elt] = mark; } if (computeL) { computeL = FALSE; computeU = TRUE; *p_nextl = next; } else { computeU = FALSE; *p_nextu = next; } mark ++; } if (vtx != fstVtx_blk) (*p_nsuper_loc) ++; } /* a new supernode starts */ /* vtx belongs to the curent supernode */ Pslu_freeable->supno_loc[vtx_lid] = *p_nsuper_loc; } (*p_nsuper_loc) ++; } if (ind_blk > 0) { /* if iam owns blocks of this level */ i = *p_nextl - Llu_symbfact->xlsub[snlid]; j = *p_nextu - Llu_symbfact->xusub[snlid]; if (VInfo->begEndBlks_loc[ind_blk - 1] == lstVtx && i > 1 && j > 0) { /* if iam the last processor owning a block of this level */ computeL = TRUE; computeU = FALSE; /* prune the structure */ while (computeL || computeU) { if (computeL) { sub = Llu_symbfact->lsub; xsub = Llu_symbfact->xlsub; next = *p_nextl; computeL = FALSE; computeU = TRUE; } else { sub = Llu_symbfact->usub; xsub = Llu_symbfact->xusub; next = *p_nextu; computeU = FALSE; } kmin = xsub[snlid]; kmax = next - 1; if (prval != n) { maxElt = prval; while (kmin <= kmax) { /* Do a quicksort-type partition. */ if (sub[kmax] > prval) kmax--; else if (sub[kmin] <= prval) { kmin++; } else { /* kmin does'nt belong to G^s(L), and kmax belongs: * interchange the two subscripts */ ktemp = sub[kmin]; sub[kmin] = sub[kmax]; sub[kmax] = ktemp; kmin ++; kmax --; } if (sub[kmin-1] == prval) prpos = kmin - 1; } } else { maxElt = EMPTY; while (kmin <= kmax) { /* compute maximum element of L(:, vtx) */ if (sub[kmin] > maxElt) { maxElt = sub[kmin]; prpos = kmin; } kmin ++; } } ktemp = sub[xsub[snlid]]; sub[xsub[snlid]] = maxElt; sub[prpos] = ktemp; } /* setup snd_interSz information */ prval = Llu_symbfact->lsub[Llu_symbfact->xlsub[snlid]]; if (prval >= lstVtx) { /* this supernode will be send to next layers of the tree */ while (prval >= lstVtx && szSep != 1) { ind_sizes2 = ind_sizes1 + szSep + (ind_sizes2 - ind_sizes1) / 2; ind_sizes1 += szSep; lvl ++; szSep = szSep / 2; lstVtx = fstVtxSep[ind_sizes2] + sizes[ind_sizes2]; CS->snd_interSz[lvl] += i + j + 4; CS->snd_LinterSz[lvl] += i + 2; if (CS->snd_vtxinter[lvl] == EMPTY) CS->snd_vtxinter[lvl] = snrep; } } } } /* restore value in cntelt_vtcs */ if (lstVtx_dns_lid != EMPTY) Llu_symbfact->cntelt_vtcs[lstVtx_dns_lid] = save_cnt; *p_mark = mark; if (minElt_vtx != CS->rcv_buf) SUPERLU_FREE (minElt_vtx); SUPERLU_FREE (x_newelts_U); if (newelts_L) SUPERLU_FREE (newelts_L); if (newelts_U) SUPERLU_FREE (newelts_U); if (PS->szDnsSep < mem_dnsCS) PS->szDnsSep = mem_dnsCS; return 0; } /*! \brief
   All processors affected to current node must call this routine
   when VInfo->filledSep == FILLED_SEP
   This is necessary since subsequent routines called from here use 
   MPI_allreduce among all processors affected to curent node
*/ static int_t denseSep_symbfact ( int rcvd_dnsSep, /* =1 if processor received info that the separator became dense, =0 if myPE determined that separator is full */ int_t n, /* Input - order of the matrix */ int iam, /* Input - my processor number */ int ind_sizes1, int ind_sizes2, int_t *sizes, /* Input - sizes of each separator in the separator tree */ int_t *fstVtxSep, /* Input - first vertex of each node in the tree */ int szSep, int fstP, /* first pe affected current node */ int lstP, /* last pe affected current node */ int_t fstVtx_blkCyc, int_t nblk_loc, /* block number in the block cyclic distribution of current supernode */ int_t *p_nextl, int_t *p_nextu, int_t *p_mark, int_t *p_nsuper_loc, int_t *marker, MPI_Comm ndCom, MPI_Comm *symb_comm, /* Input - communicator for symbolic factorization */ Llu_symbfact_t *Llu_symbfact, /* Input/Output - local L, U data structures */ Pslu_freeable_t *Pslu_freeable, vtcsInfo_symbfact_t *VInfo, /* Input - local info on vertices distribution */ comm_symbfact_t *CS, psymbfact_stat_t *PS ) { int nprocsLvl, p, prvP, tag; int_t nmsgsToSnd, nmsgsToRcv; int_t ind_blk, mem_error; int_t *rcv_intraLvl; int_t fstVtx, lstVtx, cur_blk, lstVtx_blk, fstVtx_blk; int_t *globToLoc, maxNvtcsPProc; MPI_Status status; globToLoc = Pslu_freeable->globToLoc; maxNvtcsPProc = Pslu_freeable->maxNvtcsPProc; fstVtx = fstVtxSep[ind_sizes2]; lstVtx = fstVtx + sizes[ind_sizes2]; rcv_intraLvl = CS->rcv_intraLvl; cur_blk = VInfo->curblk_loc; nprocsLvl = lstP - fstP; if (nblk_loc == 0) { nmsgsToSnd = 2; nmsgsToRcv = 1; } else { nmsgsToSnd = 1; nmsgsToRcv = 0; if (!rcvd_dnsSep) nmsgsToRcv ++; } if (iam == fstP && rcvd_dnsSep && nblk_loc == 1) nmsgsToRcv ++; /* first exchange msgs with all processors affected to current node */ ind_blk = cur_blk; while ((nmsgsToSnd || nmsgsToRcv) && VInfo->begEndBlks_loc[ind_blk] < lstVtx) { tag = (int) (tag_intraLvl + nblk_loc); if (nmsgsToSnd) { lstVtx_blk = VInfo->begEndBlks_loc[ind_blk + 1]; if (lstVtx_blk != lstVtx) { p = OWNER( globToLoc[lstVtx_blk]); MPI_Send (&(rcv_intraLvl[fstP]), nprocsLvl, mpi_int_t, p, tag, (*symb_comm)); #if ( PRNTlevel>=1 ) PS->no_shmSnd += (float) 1; #endif } nmsgsToSnd --; } ind_blk += 2; nblk_loc ++; tag = tag_intraLvl + nblk_loc; fstVtx_blk = VInfo->begEndBlks_loc[ind_blk]; if (nmsgsToRcv && fstVtx_blk < lstVtx) { if (iam == fstP) tag --; prvP = OWNER( globToLoc[fstVtx_blk - 1]); MPI_Recv (&(rcv_intraLvl[fstP]), nprocsLvl, mpi_int_t, prvP, tag, (*symb_comm), &status); #if ( PRNTlevel>=1 ) PS->no_shmRcvd += (float) 1; #endif nmsgsToRcv --; } } if (VInfo->filledSep == FILLED_SEP) { if (mem_error = dnsCurSep_symbfact (n, iam, ind_sizes1, ind_sizes2, sizes, fstVtxSep, szSep, lstP - fstP, rcvd_dnsSep, p_nextl, p_nextu, p_mark, p_nsuper_loc, marker, ndCom, Llu_symbfact, Pslu_freeable, VInfo, CS, PS)) return (mem_error); } else if (rcvd_dnsSep) if (mem_error = dnsUpSeps_symbfact (n, iam, szSep, ind_sizes1, ind_sizes2, sizes, fstVtxSep, EMPTY, Llu_symbfact, Pslu_freeable, VInfo, CS, PS, p_nextl, p_nextu, p_nsuper_loc)) return (mem_error); return 0; } static int_t interLvl_symbfact ( SuperMatrix *A, /* Input - input matrix A */ int iam, /* Input - my processor number */ int lvl, /* Input - current level in the separator tree */ int szSep, /* Input - size of the current separator (node) */ int fstP, /* Input - first processor assigned to current node */ int lstP, /* Input - last processor assigned to current node */ int ind_sizes1, int ind_sizes2, int_t *sizes, /* Input - sizes of each node in the separator tree */ int_t *fstVtxSep, /* Input - first vertex of each node in the tree */ int_t *p_nextl, int_t *p_nextu, int_t *p_nsuper_loc, int_t *pmark, /* mark for symbfact */ int_t *marker, /* temp array used for marking */ Llu_symbfact_t *Llu_symbfact, /* Input/Output - local L, U data structures */ Pslu_freeable_t *Pslu_freeable, comm_symbfact_t *CS,/* infos on communication data structures */ vtcsInfo_symbfact_t *VInfo, /* Input/Output - local info on vertices distribution */ psymbfact_stat_t *PS, MPI_Comm ndComm, MPI_Comm *symb_comm /* Input - communicator for symbolic factorization */ ) { MPI_Status *status; MPI_Request *request_snd, *request_rcv; int nprocsLvl, rcvdP, p, filledSep_lvl; int toSend, toSendL, toSendU; int_t *rcv_interLvl; int_t *snd_interLvl, *snd_interLvl1, *snd_interLvl2, snd_interLvlSz, snd_LinterLvlSz, snd_vtxLvl; int_t vtx_elt, update_loc, code_err; int_t *lsub, *xlsub, *usub, *xusub; int_t *lsub_rcvd, lsub_rcvd_sz, *usub_rcvd, usub_rcvd_sz; int_t n, mark, max_rcvSz; int_t nextl, nextu, ind_blk, vtx_lid, k, count, nelts, lstVtxLvl_loc, lstVtxLvl_loc_lid, mem_error; int_t fstVtx_blk, lstVtx_blk, i, j, vtx, prElt_L, prElt_U, snd_indBlk, prElt_ind; int_t fstVtxLvl_loc, nvtcsLvl_loc, maxNvtcsPProc, *globToLoc, fstVtx, lstVtx; int ind1, nprocsToRcv, nprocsToSnd, ind2, ind_l, ind_u, ij, ik; int_t req_ind, sent_msgs, req_ind_snd; int_t initInfo_loc[2], initInfo_gl[2]; /* Initialization */ n = A->ncol; fstVtx = fstVtxSep[ind_sizes2]; lstVtx = fstVtx + sizes[ind_sizes2]; maxNvtcsPProc = Pslu_freeable->maxNvtcsPProc; globToLoc = Pslu_freeable->globToLoc; nprocsLvl = lstP - fstP; rcv_interLvl = CS->rcv_interLvl; snd_interLvl = CS->snd_interLvl; snd_interLvlSz = CS->snd_interSz[lvl]; snd_LinterLvlSz = CS->snd_LinterSz[lvl]; snd_vtxLvl = CS->snd_vtxinter[lvl]; fstVtxLvl_loc = VInfo->begEndBlks_loc[VInfo->curblk_loc]; nvtcsLvl_loc = VInfo->nvtcsLvl_loc; request_snd = NULL; request_rcv = NULL; status = NULL; mark = *pmark; lsub = Llu_symbfact->lsub; xlsub = Llu_symbfact->xlsub; usub = Llu_symbfact->usub; xusub = Llu_symbfact->xusub; /* snd_vtxLvl denotes the first vertex from which iam needs to send data. snd_interLvlSz denotes maximum size of the send data, snd_LinterLvlSz denotes send data corresponding to L part */ /* determine maximum size of receive buffer and information on filled sep */ if (snd_interLvlSz != 0) { if (snd_LinterLvlSz == 0) snd_interLvlSz = 0; if (snd_interLvlSz - snd_LinterLvlSz == 0) snd_interLvlSz = 0; } initInfo_loc[0] = snd_interLvlSz; initInfo_loc[1] = (int_t) VInfo->filledSep; MPI_Allreduce (initInfo_loc, initInfo_gl, 2, mpi_int_t, MPI_MAX, ndComm); #if ( PRNTlevel>=1 ) PS->no_msgsCol += (float) (2 * (int_t) LOG2( nprocsLvl )); PS->sz_msgsCol += 2; if (PS->maxsz_msgCol < 2) PS->maxsz_msgCol = 2; #endif max_rcvSz = initInfo_gl[0]; filledSep_lvl = (int) initInfo_gl[1]; if (filledSep_lvl == FILLED_SEPS) { /* quick return if all upper separators are dense */ if (VInfo->filledSep != FILLED_SEPS) { VInfo->filledSep = FILLED_SEPS; if (mem_error = dnsUpSeps_symbfact (n, iam, szSep, ind_sizes1, ind_sizes2, sizes, fstVtxSep, EMPTY, Llu_symbfact, Pslu_freeable, VInfo, CS, PS, p_nextl, p_nextu, p_nsuper_loc)) return (mem_error); } return 0; } if (max_rcvSz == 0) /* quick return if no communication necessary */ return 0; /* allocate data for the send buffer */ if (snd_interLvlSz) if (CS->snd_bufSz < snd_interLvlSz) { PS->maxSzBuf += snd_interLvlSz - CS->snd_bufSz; if (CS->snd_bufSz != 0) /* not first time allocate memory */ SUPERLU_FREE (CS->snd_buf); CS->snd_bufSz = snd_interLvlSz; if (!(CS->snd_buf = intMalloc_symbfact (snd_interLvlSz))) { ABORT("Malloc fails for snd_buf[]."); } } /* snd_interLvl : to which processors the data need to be send * information setup during the copy of data to be send in the buffer * rcv_interLvl : from which processors iam receives update data */ for (p = 2*fstP; p < 2*lstP; p++) snd_interLvl[p] = EMPTY; if (snd_interLvlSz == 0 && nvtcsLvl_loc == 0) { code_err = MPI_Alltoall (&(snd_interLvl[2*fstP]), 2, mpi_int_t, &(rcv_interLvl[2*fstP]), 2, mpi_int_t, ndComm); #if ( PRNTlevel>=1 ) PS->no_msgsCol += (float) (2 * (int_t) LOG2( nprocsLvl )); PS->sz_msgsCol += 2; if (PS->maxsz_msgCol < 2) PS->maxsz_msgCol = 2; #endif return 0; } /* in interLvlInfos, * obtain from which processors iam receives update information */ update_loc = FALSE; nextl = 0; nextu = snd_LinterLvlSz; if (snd_interLvlSz != 0) { /* copy data to be send */ /* find index block from where to send data */ ind_blk = VInfo->curblk_loc; while (snd_vtxLvl < VInfo->begEndBlks_loc[ind_blk]) { ind_blk -= 2; } snd_indBlk = ind_blk; vtx_lid = LOCAL_IND( globToLoc[snd_vtxLvl] ); for (; ind_blk < VInfo->curblk_loc; ind_blk += 2) { fstVtx_blk = VInfo->begEndBlks_loc[ind_blk]; if (ind_blk == snd_indBlk) fstVtx_blk = snd_vtxLvl; lstVtx_blk = VInfo->begEndBlks_loc[ind_blk + 1]; for (vtx = fstVtx_blk; vtx < lstVtx_blk; vtx++, vtx_lid ++) { toSendL = FALSE; toSendU = FALSE; if (xlsub[vtx_lid] != xlsub[vtx_lid+1] && xusub[vtx_lid] != xusub[vtx_lid+1]) { k = xlsub[vtx_lid]; prElt_L = lsub[k]; j = xusub[vtx_lid]; prElt_U = usub[j]; if (prElt_L >= fstVtx || prElt_U >= fstVtx) { if (prElt_L >= fstVtx) while (lsub[k] <= prElt_L && k < xlsub[vtx_lid + 1]) { vtx_elt = lsub[k]; if (vtx_elt >= fstVtx && vtx_elt < lstVtx) { p = OWNER( globToLoc[vtx_elt] ); if (p != iam) { /* vtx will be send to another processor */ snd_interLvl[2*p] = TRUE; toSendL = TRUE; } else update_loc = TRUE; } k++; } if (prElt_U >= fstVtx) while (usub[j] <= prElt_U && j < xusub[vtx_lid + 1]) { vtx_elt = usub[j]; if (vtx_elt >= fstVtx && vtx_elt < lstVtx) { p = OWNER( globToLoc[vtx_elt] ); if (p != iam) { /* vtx will be send to another processor */ snd_interLvl[2*p+1] = TRUE; toSendU = TRUE; } else update_loc = TRUE; } j ++; } if (toSendL || toSendU) { /* L(:, vtx) and U(vtx, :) will be send to processors */ CS->snd_buf[nextu + DIAG_IND] = vtx; nelts = xusub[vtx_lid+1] - xusub[vtx_lid]; CS->snd_buf[nextu + NELTS_IND] = nelts; nextu += 2; for (j = xusub[vtx_lid]; j < xusub[vtx_lid+1]; j++, nextu ++) { CS->snd_buf[nextu] = usub[j]; } CS->snd_buf[nextl + DIAG_IND] = vtx; nelts = xlsub[vtx_lid+1] - xlsub[vtx_lid]; CS->snd_buf[nextl + NELTS_IND] = nelts; nextl += 2; for (j = xlsub[vtx_lid]; j < xlsub[vtx_lid+1]; j++, nextl ++) { CS->snd_buf[nextl] = lsub[j]; } } } } } } lstVtxLvl_loc = vtx; lstVtxLvl_loc_lid = vtx_lid; } if (nextl == 0 || nextu - snd_LinterLvlSz == 0) { for (p = 2*fstP; p < 2*lstP; p++) snd_interLvl[p] = EMPTY; } nprocsToSnd = 0; for (p = 2*fstP; p < 2*lstP; p +=2) { if (snd_interLvl[p] != EMPTY || snd_interLvl[p+1] != EMPTY) { snd_interLvl[p] = nextl; snd_interLvl[p+1] = nextu - snd_LinterLvlSz; nprocsToSnd ++; } } MPI_Alltoall (&(snd_interLvl[2*fstP]), 2, mpi_int_t, &(rcv_interLvl[2*fstP]), 2, mpi_int_t, ndComm); #if ( PRNTlevel>=1 ) PS->no_msgsCol += (float) (2 * (int_t) LOG2( nprocsLvl )); PS->sz_msgsCol += 2 * nprocsLvl; if (PS->maxsz_msgCol < 2 * nprocsLvl) PS->maxsz_msgCol = 2 * nprocsLvl; #endif max_rcvSz = 0; nprocsToRcv = 0; for (p = 2*fstP; p < 2*lstP; p +=2) { CS->ptr_rcvBuf[p] = max_rcvSz; if (rcv_interLvl[p] != EMPTY) max_rcvSz += rcv_interLvl[p]; CS->ptr_rcvBuf[p+1] = max_rcvSz; if (rcv_interLvl[p+1] != EMPTY) max_rcvSz += rcv_interLvl[p+1]; if (rcv_interLvl[p] != EMPTY || rcv_interLvl[p+1] != EMPTY) nprocsToRcv ++; } /* allocate data for the receive buffer */ if (CS->rcv_bufSz < max_rcvSz) { PS->maxSzBuf += max_rcvSz - CS->rcv_bufSz; if (CS->rcv_bufSz != 0) /* not first time allocate memory */ SUPERLU_FREE (CS->rcv_buf); CS->rcv_bufSz = max_rcvSz; if (!(CS->rcv_buf = intMalloc_symbfact (max_rcvSz))) { ABORT("Malloc fails for rcv_buf[]."); } } /* allocate memory for status arrays */ if (nprocsToSnd) if ( !(request_snd = (MPI_Request*) SUPERLU_MALLOC(2 * nprocsToSnd * sizeof(MPI_Request)))) ABORT("Not enough memory when allocating MPI_Request"); if (nprocsToRcv) if ( !(request_rcv = (MPI_Request*) SUPERLU_MALLOC(2 * nprocsToRcv * sizeof(MPI_Request)))) ABORT("Not enough memory when allocating MPI_Request"); if (nprocsToRcv || nprocsToSnd) if ( !(status = (MPI_Status*) SUPERLU_MALLOC(2 * (lstP-fstP) * sizeof(MPI_Status)))) ABORT("Not enough memory when allocating MPI_Request"); /* determine if we have to send data */ i = 0; for (toSend = fstP, p = 2*fstP; p < 2*lstP; toSend++, p+=2) if (snd_interLvl[p] != EMPTY && toSend != iam) { MPI_Isend (CS->snd_buf, nextl, mpi_int_t, toSend, tag_interLvl_LData, (*symb_comm), &(request_snd[2*i])); MPI_Isend (&(CS->snd_buf[snd_LinterLvlSz]), nextu - snd_LinterLvlSz, mpi_int_t, toSend, tag_interLvl_UData, (*symb_comm), &(request_snd[2*i+1])); i++; #if ( PRNTlevel>=1 ) PS->no_msgsSnd += (float) 2; PS->sz_msgsSnd += (float) (nextl + nextu - snd_LinterLvlSz); if (PS->maxsz_msgSnd < nextl) PS->maxsz_msgSnd = nextl; if (PS->maxsz_msgSnd < nextu - snd_LinterLvlSz) PS->maxsz_msgSnd = nextu - snd_LinterLvlSz; #endif } if (update_loc) { /* use own data to update symbolic factorization */ vtx_lid = LOCAL_IND( globToLoc[snd_vtxLvl] ); lsub_rcvd = &(lsub[xlsub[vtx_lid]]); lsub_rcvd_sz = xlsub[lstVtxLvl_loc_lid] - xlsub[vtx_lid]; usub_rcvd = &(usub[xusub[vtx_lid]]); usub_rcvd_sz = xusub[lstVtxLvl_loc_lid] - xusub[vtx_lid]; mem_error = rl_update (0, n, iam, lsub_rcvd, lsub_rcvd_sz, usub_rcvd, usub_rcvd_sz, snd_vtxLvl, EMPTY, snd_indBlk, fstVtxLvl_loc, lstVtx, nvtcsLvl_loc, 1, &mark, marker, Pslu_freeable, Llu_symbfact, VInfo, PS); lsub_rcvd = &(Llu_symbfact->lsub[xlsub[vtx_lid]]); lsub_rcvd_sz = xlsub[lstVtxLvl_loc_lid] - xlsub[vtx_lid]; usub_rcvd = &(Llu_symbfact->usub[xusub[vtx_lid]]); usub_rcvd_sz = xusub[lstVtxLvl_loc_lid] - xusub[vtx_lid]; lsub = Llu_symbfact->lsub; usub = Llu_symbfact->usub; mem_error = rl_update (0, n, iam, usub_rcvd, usub_rcvd_sz, lsub_rcvd, lsub_rcvd_sz, snd_vtxLvl, EMPTY, snd_indBlk, fstVtxLvl_loc, lstVtx, nvtcsLvl_loc, 0, &mark, marker, Pslu_freeable, Llu_symbfact, VInfo, PS); lsub = Llu_symbfact->lsub; usub = Llu_symbfact->usub; } /* post non-blocking receives for all the incoming messages */ i = 0; for (rcvdP = fstP, p = 2*fstP; p < 2*lstP; rcvdP++, p += 2) if (rcv_interLvl[p] != EMPTY) { lsub_rcvd = &(CS->rcv_buf[CS->ptr_rcvBuf[p]]); MPI_Irecv (lsub_rcvd, rcv_interLvl[p], mpi_int_t, rcvdP, tag_interLvl_LData, (*symb_comm), &(request_rcv[i])); usub_rcvd = &(CS->rcv_buf[CS->ptr_rcvBuf[p+1]]); MPI_Irecv (usub_rcvd, rcv_interLvl[p+1], mpi_int_t, rcvdP, tag_interLvl_UData, (*symb_comm), &(request_rcv[i+1])); i += 2; #if ( PRNTlevel>=1 ) PS->no_msgsRcvd += (float) 2; PS->sz_msgsRcvd += (float) (rcv_interLvl[p] + rcv_interLvl[p+1]); if (PS->maxsz_msgRcvd < rcv_interLvl[p]) PS->maxsz_msgRcvd = rcv_interLvl[p]; if (PS->maxsz_msgRcvd < rcv_interLvl[p+1]) PS->maxsz_msgRcvd = rcv_interLvl[p+1]; #endif } /* wait until messages are received and update local data */ for (i = 0; i < nprocsToRcv; i++) { MPI_Waitany (2*nprocsToRcv, request_rcv, &ind1, status); ij = 0; for (p = fstP; p < lstP; p++) if (rcv_interLvl[2*p] != EMPTY) { if (ij <= ind1 && ind1 < ij+2) { rcvdP = p; p = lstP; if (ind1 == ij) ind2 = ij+1; else ind2 = ind1 - 1; ind_l = ij; ind_u = ij+1; } ij += 2; } MPI_Get_count (status, mpi_int_t, &ij); MPI_Wait (&(request_rcv[ind2]), status); MPI_Get_count (status, mpi_int_t, &ik); if (ind1 == ind_l) { lsub_rcvd_sz = ij; usub_rcvd_sz = ik; } else { lsub_rcvd_sz = ik; usub_rcvd_sz = ij; } lsub_rcvd = &(CS->rcv_buf[CS->ptr_rcvBuf[2*rcvdP]]); usub_rcvd = &(CS->rcv_buf[CS->ptr_rcvBuf[2*rcvdP+1]]); /* use received data to update symbolic factorization information */ mem_error = rl_update (1, n, iam, lsub_rcvd, lsub_rcvd_sz, usub_rcvd, usub_rcvd_sz, EMPTY, EMPTY, EMPTY, fstVtxLvl_loc, lstVtx, nvtcsLvl_loc, 1, &mark, marker, Pslu_freeable, Llu_symbfact, VInfo, PS); lsub = Llu_symbfact->lsub; mem_error = rl_update (1, n, iam, usub_rcvd, usub_rcvd_sz, lsub_rcvd, lsub_rcvd_sz, EMPTY, EMPTY, EMPTY, fstVtxLvl_loc, lstVtx, nvtcsLvl_loc, 0, &mark, marker, Pslu_freeable, Llu_symbfact, VInfo, PS); usub = Llu_symbfact->usub; } if (nprocsToSnd) MPI_Waitall (2*nprocsToSnd, request_snd, status); *pmark = mark; if (request_snd != NULL) SUPERLU_FREE (request_snd); if (request_rcv != NULL) SUPERLU_FREE (request_rcv); if (status != NULL) SUPERLU_FREE (status); return 0; } static void freeComm ( int iam, /* Input -my processor number */ int nprocs, /* Input -number of procs for the symbolic fact. */ MPI_Comm *commLvls, /* Input -communicators for the nodes in the sep tree */ MPI_Comm *symb_comm /* Input - communicator for symbolic factorization */ ) { int szSep, i, j, k; int np, npNode, fstP, lstP, ind; i = 2 * nprocs - 2; MPI_Comm_free (&(commLvls[i])); szSep = 2; i -= szSep; while (i > 0) { /* for each level in the separator tree */ npNode = nprocs / szSep; fstP = 0; /* for each node in the level */ for (j = i; j < i + szSep; j++) { lstP = fstP + npNode; if (fstP <= iam && iam < lstP) { ind = j; } fstP += npNode; } MPI_Comm_free ( &(commLvls[ind]) ); szSep *= 2; i -= szSep; } } static void createComm ( int iam, /* Input -my processor number */ int nprocs, /* Input -number of procs for the symbolic factorization */ MPI_Comm *commLvls, /* Output -communicators for the nodes in the sep tree */ MPI_Comm *symb_comm ) { int szSep, i, j, jj, k, *pranks; int np, npNode, fstP, lstP, p, code_err, ind, col, key; for (i=0; i < 2*nprocs; i++) commLvls[i] = MPI_COMM_NULL; /* Make a list of the processes in the new communicator. */ pranks = (int *) SUPERLU_MALLOC( nprocs * sizeof(int) ); i = 2 * nprocs - 2; MPI_Comm_dup ((*symb_comm), &(commLvls[i])); szSep = 2; i -= szSep; while (i > 0) { /* for each level in the separator tree */ npNode = nprocs / szSep; fstP = 0; /* for each node in the level */ for (j = i; j < i + szSep; j++) { lstP = fstP + npNode; if (fstP <= iam && iam < lstP) { ind = j; key = iam - fstP; col = fstP; } fstP += npNode; } MPI_Comm_split ((*symb_comm), col, key, &(commLvls[ind]) ); szSep *= 2; i -= szSep; } SUPERLU_FREE (pranks); } static void intraLvl_symbfact ( SuperMatrix *A, /* Input - original matrix A */ int iam, /* Input - my processor number */ int lvl, /* Input - current level in the separator tree */ int szSep, /* Input - size of the current separator(node) */ int ind_sizes1, int ind_sizes2, int_t *sizes, /* Input - sizes of each node in the separator tree */ int_t *fstVtxSep, /* Input - first vertex of each node in the tree */ int fstP, /* Input - first processor assigned to current node */ int lstP, /* Input - last processor assigned to current node */ int_t fstVtx, /* Input - first vertex of current node */ int_t lstVtx, /* Input - last vertex of current node */ Pslu_freeable_t *Pslu_freeable, /* global LU data structures (modified) */ Llu_symbfact_t *Llu_symbfact, /* Input/Output - local L, U data structures */ vtcsInfo_symbfact_t *VInfo, /* Input/Output - local info on vertices distribution */ comm_symbfact_t *CS, psymbfact_stat_t *PS, int_t *marker, int_t *p_mark, /* marker used to merge elements of vertices */ int_t *p_nextl, /* ptr to nextl in lsub structure */ int_t *p_nextu, /* ptr to nextu in usub structure */ int_t *p_neltsZr, /* no of artificial zeros introduced so far */ int_t *p_neltsTotal, /* no of nonzeros (including artificials) computed so far */ int_t *p_nsuper_loc, MPI_Comm ndComm, MPI_Comm *symb_comm /* Input - communicator for symbolic factorization */ ) { int nprocsLvl, p, prvP, rcvP; int toSend, rcvd_prvP, index_req[2]; int_t fstVtx_loc_lid, fstVtx_loc, vtx, vtxLvl, curblk_loc, denseSep; int_t fstVtx_blk, fstVtx_blk_lid, lstVtx_blk, lstVtx_blk_lid, tag; int_t nvtcs_blk, xusub_end, xlsub_end, prv_fstVtx_blk; int_t n; int_t *rcv_intraLvl, *snd_intraLvl; int_t *lsub_rcvd, lsub_rcvd_sz, *usub_rcvd, usub_rcvd_sz; int_t nmsgsRcvd, nmsgsTRcv, sz_msg; int_t nvtcsLvl_loc, nextl, nextu, ind_blk, snd_vtxLvl, maxNeltsVtx_in; int_t count, vtx_loc, mem_error, lstBlkRcvd; int_t fstVtx_blk_loc, fstBlk, vtx_lid, prElt, nelts, j, nvtcs_toUpd; int_t snd_LinterLvlSz, fstVtx_blk_loc_lid, prElt_ind, maxNmsgsToRcv; int_t *xlsub, *xusub, *lsub, *usub; int_t *globToLoc, maxNvtcsPProc, nblk_loc, upd_myD, r, fstVtx_blkCyc; int_t k, prElt_L, prElt_U, vtx_elt, fstVtx_toUpd; int intSzMsg; MPI_Status status[4]; MPI_Request request[4]; /* Initializations */ lsub = Llu_symbfact->lsub; xlsub = Llu_symbfact->xlsub; usub = Llu_symbfact->usub; xusub = Llu_symbfact->xusub; /* max number of msgs this processor can receive during intraLvl_symbfact routine */ maxNmsgsToRcv = (lstVtx - fstVtx) / VInfo->maxSzBlk + 1; maxNeltsVtx_in = VInfo->maxNeltsVtx; globToLoc = Pslu_freeable->globToLoc; maxNvtcsPProc = Pslu_freeable->maxNvtcsPProc; n = A->ncol; nprocsLvl = lstP - fstP; rcv_intraLvl = CS->rcv_intraLvl; snd_intraLvl = CS->snd_intraLvl; nvtcsLvl_loc = VInfo->nvtcsLvl_loc; nmsgsTRcv = 0; nmsgsRcvd = 0; nblk_loc = 0; nvtcs_toUpd = nvtcsLvl_loc; fstVtx_blk = fstVtx; denseSep = FALSE; /* determine first vertex that belongs to fstP */ k = fstVtx; fstVtx_blkCyc = n; while (k < lstVtx && fstVtx_blkCyc == n) { p = OWNER( globToLoc[k] ); if (p == fstP) fstVtx_blkCyc = k; k += VInfo->maxSzBlk; } for (p = fstP; p < lstP; p++) rcv_intraLvl[p] = 0; for (r = 0; r < 3; r++) request[r] = MPI_REQUEST_NULL; fstVtx_loc = VInfo->begEndBlks_loc[VInfo->curblk_loc]; fstVtx_loc_lid = LOCAL_IND( globToLoc[fstVtx_loc] ); vtx = fstVtx_loc; if (fstVtx_loc >= fstVtx_blkCyc) nblk_loc = 1; while (VInfo->begEndBlks_loc[VInfo->curblk_loc] < lstVtx && !VInfo->filledSep) { CS->snd_intraSz = 0; CS->snd_LintraSz = 0; lstBlkRcvd = FALSE; prv_fstVtx_blk = fstVtx_blk; fstVtx_blk = VInfo->begEndBlks_loc[VInfo->curblk_loc]; lstVtx_blk = VInfo->begEndBlks_loc[VInfo->curblk_loc + 1]; fstVtx_toUpd = VInfo->begEndBlks_loc[VInfo->curblk_loc + 2]; fstVtx_blk_lid = LOCAL_IND( globToLoc[fstVtx_blk] ); lstVtx_blk_lid = LOCAL_IND( globToLoc[lstVtx_blk - 1] + 1); nvtcs_blk = lstVtx_blk - fstVtx_blk; nvtcs_toUpd -= nvtcs_blk; nmsgsTRcv = n; VInfo->maxNeltsVtx -= fstVtx_blk - prv_fstVtx_blk; index_req[0] = EMPTY; for (r = 0; r < 3; r++) request[r] = MPI_REQUEST_NULL; if (fstVtx_blk != fstVtx) { /* if not the first vertex of the level */ prvP = OWNER( globToLoc[fstVtx_blk - 1] ); rcvd_prvP = FALSE; /* receive info on number messages to receive */ tag = tag_intraLvl + nblk_loc; if (iam == fstP) tag --; MPI_Irecv (&(rcv_intraLvl[fstP]), nprocsLvl, mpi_int_t, prvP, tag, (*symb_comm), &(request[1])); while (!rcvd_prvP || nmsgsRcvd < nmsgsTRcv) { if (index_req[0] != 1) { MPI_Irecv (&sz_msg, 1, mpi_int_t, MPI_ANY_SOURCE, tag_intraLvl_szMsg, (*symb_comm), &(request[0])); if (sz_msg > INT_MAX) ABORT("ERROR in intraLvl_symbfact size to send > INT_MAX\n"); } MPI_Waitany (2, request, index_req, status); if (index_req[0] == 1) { /* receive information on no msgs to receive */ #if ( PRNTlevel>=1 ) PS->no_shmRcvd ++; #endif rcvd_prvP = TRUE; nmsgsTRcv = rcv_intraLvl[iam]; /* if dense separator was detected by one of the previous processors ... */ if (nmsgsTRcv > maxNmsgsToRcv) { VInfo->filledSep = (int) nmsgsTRcv / maxNmsgsToRcv; nmsgsTRcv = nmsgsTRcv % maxNmsgsToRcv; } if (nmsgsTRcv == nmsgsRcvd) { /* MPI_Cancel (&(request[0])); */ MPI_Send (&r, 1, mpi_int_t, iam, tag_intraLvl_szMsg, (*symb_comm)); MPI_Wait (&(request[0]), status); } } if (index_req[0] == 0) { nmsgsRcvd ++; if (nmsgsTRcv == nmsgsRcvd) lstBlkRcvd = TRUE; rcvP = status->MPI_SOURCE; /* allocate enough space to receive data */ if (CS->rcv_bufSz < sz_msg) { PS->maxSzBuf += sz_msg - CS->rcv_bufSz; if (CS->rcv_bufSz != 0) /* not first time allocate memory */ SUPERLU_FREE (CS->rcv_buf); CS->rcv_bufSz = sz_msg; if (!(CS->rcv_buf = intMalloc_symbfact (sz_msg))) { ABORT("Malloc fails for rcv_buf[]."); } } /* use received data to update symbolic factorization */ lsub_rcvd = CS->rcv_buf; MPI_Recv (lsub_rcvd, sz_msg, mpi_int_t, rcvP, tag_intraLvl_LData, (*symb_comm), status); MPI_Get_count (status, mpi_int_t, &intSzMsg); lsub_rcvd_sz = intSzMsg; usub_rcvd = &(CS->rcv_buf[lsub_rcvd_sz]); MPI_Recv (usub_rcvd, sz_msg - lsub_rcvd_sz, mpi_int_t, rcvP, tag_intraLvl_UData, (*symb_comm), status); MPI_Get_count (status, mpi_int_t, &intSzMsg); usub_rcvd_sz = intSzMsg; #if ( PRNTlevel>=1 ) PS->no_shmRcvd ++; PS->no_msgsRcvd += (float) 2; PS->sz_msgsRcvd += (float) sz_msg; if (PS->maxsz_msgRcvd < lsub_rcvd_sz) PS->maxsz_msgRcvd = lsub_rcvd_sz; if (PS->maxsz_msgRcvd < usub_rcvd_sz) PS->maxsz_msgRcvd = usub_rcvd_sz; #endif if (!lstBlkRcvd) { mem_error = rl_update (1, n, iam, lsub_rcvd, lsub_rcvd_sz, usub_rcvd, usub_rcvd_sz, EMPTY, EMPTY, EMPTY, fstVtx_blk, lstVtx, nvtcs_blk + nvtcs_toUpd, 1, p_mark, marker, Pslu_freeable, Llu_symbfact, VInfo, PS); lsub = Llu_symbfact->lsub; mem_error = rl_update (1, n, iam, usub_rcvd, usub_rcvd_sz, lsub_rcvd, lsub_rcvd_sz, EMPTY, EMPTY, EMPTY, fstVtx_blk, lstVtx, nvtcs_blk + nvtcs_toUpd, 0, p_mark, marker, Pslu_freeable, Llu_symbfact, VInfo, PS); usub = Llu_symbfact->usub; } } } } if (VInfo->filledSep) { mem_error = denseSep_symbfact (1, n, iam, ind_sizes1, ind_sizes2, sizes, fstVtxSep, szSep, fstP, lstP, fstVtx_blkCyc, nblk_loc, p_nextl, p_nextu, p_mark, p_nsuper_loc, marker, ndComm, symb_comm, Llu_symbfact, Pslu_freeable, VInfo, CS, PS); } else { /* compute symbolic factorization for this block */ if (!lstBlkRcvd) { lsub_rcvd = NULL; usub_rcvd = NULL; } blk_symbfact (A, iam, lvl, szSep, ind_sizes1, ind_sizes2, sizes, fstVtxSep, fstVtx_loc, fstVtx_blk, lstVtx_blk, lsub_rcvd, lsub_rcvd_sz, usub_rcvd, usub_rcvd_sz, Pslu_freeable, Llu_symbfact, VInfo, CS, PS, marker, p_mark, p_nextl, p_nextu, p_neltsZr, p_neltsTotal, p_nsuper_loc); lsub = Llu_symbfact->lsub; usub = Llu_symbfact->usub; if (lstVtx_blk != lstVtx) { /* if this is not the last block of the level */ if (VInfo->filledSep == FILLED_SEPS || ( VInfo->filledSep == FILLED_SEP && ((lstVtx - lstVtx_blk > VInfo->maxSzBlk * nprocsLvl && nblk_loc > 0) || (lstVtx - fstVtx_blkCyc > VInfo->maxSzBlk * nprocsLvl && nblk_loc == 0)))) /* if current separator is dense and this is not the last block, then ... */ denseSep = TRUE; else /* separator dense but not enough uncomputed blocks in the separator to take advantage of it */ VInfo->filledSep = FALSE; if (VInfo->filledSep == FILLED_SEPS) { for (p = fstP; p < lstP; p++) rcv_intraLvl[p] = maxNmsgsToRcv * VInfo->filledSep + rcv_intraLvl[p]; denseSep_symbfact (0, n, iam, ind_sizes1, ind_sizes2, sizes, fstVtxSep, szSep, fstP, lstP, fstVtx_blkCyc, nblk_loc, p_nextl, p_nextu, p_mark, p_nsuper_loc, marker, ndComm, symb_comm, Llu_symbfact, Pslu_freeable, VInfo, CS, PS); } else { /* send blk to next procs and update the rest of my own blocks */ if (lstBlkRcvd) { mem_error = rl_update (1, n, iam, lsub_rcvd, lsub_rcvd_sz, usub_rcvd, usub_rcvd_sz, EMPTY, EMPTY, EMPTY, fstVtx_toUpd, lstVtx, nvtcs_toUpd, 1, p_mark, marker, Pslu_freeable, Llu_symbfact, VInfo, PS); lsub = Llu_symbfact->lsub; mem_error = rl_update (1, n, iam, usub_rcvd, usub_rcvd_sz, lsub_rcvd, lsub_rcvd_sz, EMPTY, EMPTY, EMPTY, fstVtx_toUpd, lstVtx, nvtcs_toUpd, 0, p_mark, marker, Pslu_freeable, Llu_symbfact, VInfo, PS); usub = Llu_symbfact->usub; } upd_myD = FALSE; /* determine processors to which send this block and copy data to be sent */ for (p = fstP; p < lstP; p++) snd_intraLvl[p] = FALSE; nextl = 0; nextu = nextl + CS->snd_LintraSz; /* allocate enough space to receive data */ if (CS->rcv_bufSz < CS->snd_intraSz) { PS->maxSzBuf += CS->snd_intraSz - CS->rcv_bufSz; if (CS->rcv_bufSz != 0) /* not first time allocate memory */ SUPERLU_FREE (CS->rcv_buf); CS->rcv_bufSz = CS->snd_intraSz; if (!(CS->rcv_buf = intMalloc_symbfact (CS->snd_intraSz))) { ABORT("Malloc fails for rcv_buf[]."); } } for (vtx = fstVtx_blk, vtx_lid = fstVtx_blk_lid; vtx < lstVtx_blk; vtx++, vtx_lid ++) { toSend = FALSE; k = xlsub[vtx_lid]; prElt_L = lsub[k]; j = xusub[vtx_lid]; prElt_U = usub[j]; if (prElt_L >= lstVtx_blk || prElt_U >= lstVtx_blk) { if (vtx == lstVtx_blk - 1) { xlsub_end = *p_nextl; xusub_end = *p_nextu; } else { xlsub_end = xlsub[vtx_lid + 1]; xusub_end = xusub[vtx_lid + 1]; } if (prElt_L >= lstVtx_blk) { while (lsub[k] <= prElt_L && k < xlsub_end) { vtx_elt = lsub[k]; if (vtx_elt >= lstVtx_blk && vtx_elt < lstVtx) { p = OWNER( globToLoc[vtx_elt] ); if (p != iam) { /* vtx will be send to another processor */ snd_intraLvl[p] = TRUE; toSend = TRUE; } else { upd_myD = TRUE; } } k++; } } if (prElt_U >= lstVtx_blk) { while (usub[j] <= prElt_U && j < xusub_end) { vtx_elt = usub[j]; if (vtx_elt >= lstVtx_blk && vtx_elt < lstVtx) { p = OWNER( globToLoc[vtx_elt] ); if (p != iam) { /* vtx will be send to another processor */ snd_intraLvl[p] = TRUE; toSend = TRUE; } else { upd_myD = TRUE; } } j ++; } } if (toSend) { /* L(:, vtx) and U(vtx, :) will be send to processors */ nelts = xusub_end - xusub[vtx_lid]; CS->rcv_buf[nextu + DIAG_IND] = vtx; CS->rcv_buf[nextu + NELTS_IND] = nelts; nextu += 2; for (j = xusub[vtx_lid]; j < xusub_end; j++) { CS->rcv_buf[nextu] = usub[j]; nextu ++; } nelts = xlsub_end - xlsub[vtx_lid]; CS->rcv_buf[nextl + DIAG_IND] = vtx; CS->rcv_buf[nextl + NELTS_IND] = nelts; nextl += 2; for (j = xlsub[vtx_lid]; j < xlsub_end; j++) { CS->rcv_buf[nextl] = lsub[j]; nextl ++; } } } } for (p = fstP; p < lstP; p++) if (snd_intraLvl[p]) rcv_intraLvl[p] ++; if (VInfo->filledSep == FILLED_SEP) { for (p = fstP; p < lstP; p++) rcv_intraLvl[p] = maxNmsgsToRcv * VInfo->filledSep + rcv_intraLvl[p]; } else { /* send to the owner of the next block info on no of messages */ p = OWNER( globToLoc[lstVtx_blk] ); tag = tag_intraLvl + nblk_loc; MPI_Isend (&(rcv_intraLvl[fstP]), nprocsLvl, mpi_int_t, p, tag, (*symb_comm), request); #if ( PRNTlevel>=1 ) PS->no_shmSnd ++; #endif } /* there is data to be send */ sz_msg = nextl + nextu - CS->snd_LintraSz; for (p = fstP; p < lstP; p++) { if (p != iam && snd_intraLvl[p]) { MPI_Isend (&sz_msg, 1, mpi_int_t, p, tag_intraLvl_szMsg, (*symb_comm), &(request[1])); MPI_Isend (CS->rcv_buf, nextl, mpi_int_t, p, tag_intraLvl_LData, (*symb_comm), &(request[2])); MPI_Isend (&(CS->rcv_buf[CS->snd_LintraSz]), nextu - CS->snd_LintraSz, mpi_int_t, p, tag_intraLvl_UData, (*symb_comm), &(request[3])); MPI_Waitall(3, &(request[1]), &(status[1])); #if ( PRNTlevel>=1 ) PS->no_shmSnd ++; PS->no_msgsSnd += (float) 2; PS->sz_msgsSnd += (float) sz_msg; if (PS->maxsz_msgSnd < nextl) PS->maxsz_msgSnd = nextl; if (PS->maxsz_msgSnd < nextu - CS->snd_LintraSz) PS->maxsz_msgSnd = nextu - CS->snd_LintraSz; #endif } } if (VInfo->filledSep != FILLED_SEP) { MPI_Wait (request, status); } /* update rest of vertices */ if (upd_myD) { lsub_rcvd_sz = (*p_nextl) - xlsub[fstVtx_blk_lid]; lsub_rcvd = &(lsub[xlsub[fstVtx_blk_lid]]); usub_rcvd_sz = (*p_nextu) - xusub[fstVtx_blk_lid]; usub_rcvd = &(usub[xusub[fstVtx_blk_lid]]); mem_error = rl_update (0, n, iam, lsub_rcvd, lsub_rcvd_sz, usub_rcvd, usub_rcvd_sz, fstVtx_blk, lstVtx_blk, EMPTY, fstVtx_toUpd, lstVtx, nvtcs_toUpd, 1, p_mark, marker, Pslu_freeable, Llu_symbfact, VInfo, PS); lsub = Llu_symbfact->lsub; lsub_rcvd = &(lsub[xlsub[fstVtx_blk_lid]]); mem_error = rl_update (0, n, iam, usub_rcvd, usub_rcvd_sz, lsub_rcvd, lsub_rcvd_sz, fstVtx_blk, lstVtx_blk, EMPTY, fstVtx_toUpd, lstVtx, nvtcs_toUpd, 0, p_mark, marker, Pslu_freeable, Llu_symbfact, VInfo, PS); usub = Llu_symbfact->usub; } if (VInfo->filledSep == FILLED_SEP) denseSep_symbfact (0, n, iam, ind_sizes1, ind_sizes2, sizes, fstVtxSep, szSep, fstP, lstP, fstVtx_blkCyc, nblk_loc, p_nextl, p_nextu, p_mark, p_nsuper_loc, marker, ndComm, symb_comm, Llu_symbfact, Pslu_freeable, VInfo, CS, PS); } } } VInfo->curblk_loc += 2; nblk_loc ++; } /* update maxNeltsVtx */ VInfo->maxNeltsVtx = maxNeltsVtx_in - lstVtx + fstVtx; /* if current separator dense, then reset value of filledSep */ if (VInfo->filledSep == FILLED_SEP) VInfo->filledSep = FALSE; } static void symbfact_free ( int iam, /* Input - my processor number */ int nprocs, /* Input - number of processors for the symbolic factorization */ Llu_symbfact_t *Llu_symbfact, /* Input/Output - local L, U data structures */ vtcsInfo_symbfact_t *VInfo, /* Input/Output - local info on vertices distribution */ comm_symbfact_t *CS ) { /* free memory corresponding to prune structure */ if (Llu_symbfact->szLsubPr != 0) SUPERLU_FREE( Llu_symbfact->lsubPr ); if (Llu_symbfact->szUsubPr != 0) SUPERLU_FREE( Llu_symbfact->usubPr ); if (Llu_symbfact->xlsubPr != NULL) SUPERLU_FREE( Llu_symbfact->xlsubPr ); if (Llu_symbfact->xusubPr != NULL) SUPERLU_FREE( Llu_symbfact->xusubPr ); if (Llu_symbfact->xlsub_rcvd != NULL) SUPERLU_FREE( Llu_symbfact->xlsub_rcvd); if (Llu_symbfact->xusub_rcvd != NULL) SUPERLU_FREE( Llu_symbfact->xusub_rcvd); if (Llu_symbfact->cntelt_vtcs != NULL) SUPERLU_FREE( Llu_symbfact->cntelt_vtcs); if (Llu_symbfact->cntelt_vtcsA_lvl != NULL) SUPERLU_FREE( Llu_symbfact->cntelt_vtcsA_lvl); if (CS->rcv_bufSz != 0) SUPERLU_FREE( CS->rcv_buf ); if (CS->snd_bufSz != 0) SUPERLU_FREE( CS->snd_buf ); SUPERLU_FREE( VInfo->begEndBlks_loc); SUPERLU_FREE( CS->rcv_interLvl); SUPERLU_FREE( CS->snd_interLvl); SUPERLU_FREE( CS->ptr_rcvBuf); SUPERLU_FREE( CS->rcv_intraLvl); SUPERLU_FREE( CS->snd_intraLvl); SUPERLU_FREE( CS->snd_interSz); SUPERLU_FREE( CS->snd_LinterSz); SUPERLU_FREE( CS->snd_vtxinter); } static void estimate_memUsage ( int_t n, /* Input - order of the matrix */ int iam, /* Input - my processor number */ superlu_dist_mem_usage_t *symb_mem_usage, float *p_totalMemLU, /* Output -memory used for symbolic factorization */ float *p_overestimMem, /* Output -memory allocated during to right looking overestimation memory usage */ Pslu_freeable_t *Pslu_freeable, /* global LU data structures (modified) */ Llu_symbfact_t *Llu_symbfact, /* Input - local L, U data structures */ vtcsInfo_symbfact_t *VInfo, /* Input - local info on vertices distribution */ comm_symbfact_t *CS, psymbfact_stat_t *PS ) { int_t nvtcs_loc, lword, nsuper_loc; float lu_mem, other_mem, overestimMem; nvtcs_loc = VInfo->nvtcs_loc; nsuper_loc = Pslu_freeable->supno_loc[nvtcs_loc]; lword = sizeof(int_t); /* memory for xlsub, xusub, supno_loc, cntelt_vtcs */ lu_mem = 4.0 * (float) nvtcs_loc * (float) lword; /* memory for xlsubPr, xusubPr */ lu_mem += 2.0 * (float) VInfo->maxNvtcsNds_loc * (float) lword; if (PS->estimLSz < Llu_symbfact->xlsub[nvtcs_loc]) PS->estimLSz = Llu_symbfact->xlsub[nvtcs_loc]; if (PS->estimUSz < Llu_symbfact->xusub[nvtcs_loc]) PS->estimUSz = Llu_symbfact->xusub[nvtcs_loc]; lu_mem += (float) PS->estimLSz * lword; lu_mem += (float) PS->estimUSz * lword; lu_mem += (float) PS->maxSzLPr * lword; lu_mem += (float) PS->maxSzUPr * lword; lu_mem += (float) PS->szDnsSep * lword; /* memory for globToLoc, tempArray */ lu_mem += (float) 2* (float) n * lword; lu_mem += (float) PS->maxSzBuf * lword; overestimMem = (float) (PS->estimLSz - Llu_symbfact->xlsub[nvtcs_loc]) * lword; overestimMem += (float) (PS->estimUSz - Llu_symbfact->xusub[nvtcs_loc]) * lword; *p_totalMemLU = lu_mem; *p_overestimMem = overestimMem; symb_mem_usage->for_lu = (float) ((3 * nvtcs_loc + 2 * nsuper_loc) * lword); symb_mem_usage->for_lu += (float) (Llu_symbfact->xlsub[nvtcs_loc] * lword); symb_mem_usage->for_lu += (float) (Llu_symbfact->xusub[nvtcs_loc] * lword); symb_mem_usage->total = lu_mem; } static int_t * intMalloc_symbfact(int_t n) { int_t *buf; if (n == 0) buf = NULL; else buf = (int_t *) SUPERLU_MALLOC(n * sizeof(int_t)); return buf; } static int_t * intCalloc_symbfact(int_t n) { int_t *buf; register int_t i; if (n == 0) buf = NULL; else buf = (int_t *) SUPERLU_MALLOC(n * sizeof(int_t)); if ( buf ) for (i = 0; i < n; i++) buf[i] = 0; return (buf); } SuperLU_DIST_5.3.0/SRC/psymbfact.h0000644013363400111340000003017713233431301015434 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Definitions for parallel symbolic factorization routine * *
 * -- Distributed SuperLU routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * March 15, 2003
 * 
*/ #ifndef __SUPERLU_DIST_PSYMBFACT /* allow multiple inclusions */ #define __SUPERLU_DIST_PSYMBFACT /* * File name: psymbfact.h * Purpose: Definitions for parallel symbolic factorization routine */ /*! \brief * *
 *-- Structure returned by the symbolic factorization routine
 *
 * Memory is allocated during parallel symbolic factorization
 * symbfact_dist, and freed after dist_symbLU routine.
 *
 * (xlsub,lsub): lsub[*] contains the compressed subscript of
 *	rectangular supernodes; xlsub[j] points to the starting
 *	location of the j-th column in lsub[*]. Note that xlsub 
 *	is indexed by column.
 *	Storage: row subscripts
 *
 * (xusub,usub): lsub[*] contains the compressed subscript of
 *	rectangular supernodes; xusub[j] points to the starting
 *	location of the j-th row in usub[*]. Note that xusub 
 *	is indexed by rows.
 *	Storage: column subscripts
 *
 * (xsup_beg_loc,xsup_end_loc, supno_loc) describes mapping between 
 *      supernode and column, information local to each processor:
 *	xsup_beg_loc[s] is the leading column of the local s-th supernode.
 *	xsup_end_loc[s] is the last column of the local s-th supernode.
 *      supno[i] is the supernode no to which column i belongs;
 * 
*/ typedef struct { int_t *xlsub; /* pointer to the beginning of each column of L */ int_t *lsub; /* compressed L subscripts, stored by columns */ int_t szLsub; /* current max size of lsub */ int_t *xusub; /* pointer to the beginning of each row of U */ int_t *usub; /* compressed U subscripts, stored by rows */ int_t szUsub; /* current max size of usub */ int_t *supno_loc; int_t *xsup_beg_loc; int_t *xsup_end_loc; int_t nvtcs_loc; /* number of local vertices */ int_t *globToLoc; /* global to local indexing */ int_t maxNvtcsPProc; /* max number of vertices on the processors */ } Pslu_freeable_t; /*! \brief * *
 *-- The structures are determined by symbfact_dist and not used thereafter.
 *
 * (xlsub,lsub): lsub[*] contains the compressed subscript of L, as described above
 *      for Pslu_freeable_t.  This structure is used internally in symbfact_dist.
 * (xusub,usub): usub[*] contains the compressed subscript of U, as described above
 *      for Pslu_freeable_t.  This structure is used internally in symbfact_dist.
 *
 * (xlsubPr,lsubPr): contains the pruned structure of the graph of
 *      L, stored by rows as a linked list.
 *	xlsubPr[j] points to the starting location of the j-th 
 *      row in lsub[*].
 *	Storage: original row subscripts.
 *      It contains the structure corresponding to one node in the sep_tree.
 *      In each independent domain formed by x vertices, xlsubPr is of size x.
 *      Allocated and freed during domain_symbolic.
 *      For the other nodes in the level tree, formed by a maximum of 
 *      maxNvtcsNds_loc, xlsubPr is of size maxNvtcsNds_loc. 
 *      Allocated after domain_symbolic, freed at the end of symbolic_dist
 *      routine.
 * (xusubPr,usubPr): contains the pruned structure of the graph of
 *      U, stored by columns as a linked list.  Similar to (xlsubPr,lsubPr),
 *      except that it is column oriented. 
 *
 * This is allocated during symbolic factorization symbfact_dist.
 * 
*/ typedef struct { int_t *xlsubPr; /* pointer to pruned structure of L */ int_t *lsubPr; /* pruned structure of L */ int_t szLsubPr; /* size of lsubPr array */ int_t indLsubPr; /* current index in lsubPr */ int_t *xusubPr; /* pointer to pruned structure of U */ int_t *usubPr; /* pruned structure of U */ int_t szUsubPr; /* size of usubPr array */ int_t indUsubPr; /* current index in usubPr */ int_t *xlsub_rcvd; int_t *xlsub; /* pointer to structure of L, stored by columns */ int_t *lsub; /* structure of L, stored by columns */ int_t szLsub; /* current max size of lsub */ int_t nextl; /* pointer to current computation in lsub */ int_t *xusub_rcvd; /* */ int_t *xusub; /* pointer to structure of U, stored by rows */ int_t *usub; /* structure of U, stored by rows */ int_t szUsub; /* current max size of usub */ int_t nextu; /* pointer to current computation in usub */ int_t *cntelt_vtcs; /* size of column/row for each vertex */ int_t *cntelt_vtcsA_lvl; /* size of column/row of A for each vertex at the current level */ LU_space_t MemModel; /* 0 - system malloc'd; 1 - user provided */ int_t no_expand; /* Number of memory expansions */ int_t no_expand_pr; /* Number of memory expansions of the pruned structures */ int_t no_expcp; /* Number of memory expansions due to the right looking overestimation approach */ } Llu_symbfact_t; /*! \brief Local information on vertices distribution */ typedef struct { int_t maxSzBlk; /* Max no of vertices in a block */ int_t maxNvtcsNds_loc; /* Max number of vertices of a node distributed on one processor. The maximum is computed among all the nodes of the sep arator tree and among all the processors */ int_t maxNeltsVtx; /* Max number of elements of a vertex, that is condisering that the matrix is dense */ int_t nblks_loc; /* Number of local blocks */ int_t *begEndBlks_loc; /* Begin and end vertex of each local block. Array of size 2 * nblks_loc */ int_t curblk_loc; /* Index of current block in the level under computation */ int_t nvtcs_loc; /* Number of local vertices distributed on a processor */ int_t nvtcsLvl_loc; /* Number of local vertices for current level under computation */ int filledSep; /* determines if curent or all separators are filled */ int_t nnz_asup_loc; /* Number of nonzeros in asup not yet consumed. Used during symbolic factorization routine to determine how much of xusub, usub is still used to store the input matrix AS */ int_t nnz_ainf_loc; /* Number of nonzeros in ainf. Similar to nnz_asup_loc. */ int_t xusub_nextLvl; /* Pointer to usub of the next level */ int_t xlsub_nextLvl; /* Pointer to lsub of the next level */ int_t fstVtx_nextLvl; /* First vertex of the next level */ } vtcsInfo_symbfact_t; /*! \brief Structure used for redistributing A for the symbolic factorization algorithm */ typedef struct { int_t *x_ainf; /* pointers to columns of Ainf */ int_t *ind_ainf; /* column indices of Ainf */ int_t *x_asup; /* pointers to rows of Asup */ int_t *ind_asup; /* row indices of Asup */ } matrix_symbfact_t; typedef struct { int_t *rcv_interLvl; /* from which processors iam receives data */ int_t *snd_interLvl; /* to which processors iam sends data */ int_t *snd_interSz; /* size of data to be send */ int_t *snd_LinterSz; /* size of data in L part to be send */ int_t *snd_vtxinter; /* first vertex from where to send data */ /* inter level data structures */ int_t *snd_intraLvl; /* to which processors iam sends data */ int_t snd_intraSz; /* size of data to send */ int_t snd_LintraSz; /* size of data to send */ int_t *rcv_intraLvl; /* from which processors iam receives data */ int_t *rcv_buf; /* buffer to receive data */ int_t rcv_bufSz; /* size of the buffer to receive data */ int_t *snd_buf; /* buffer to send data */ int_t snd_bufSz; /* size of the buffer to send data */ int_t *ptr_rcvBuf; /* pointer to rcv_buf, the buffer to receive data */ } comm_symbfact_t; /* relaxation parameters used in the algorithms - for future release */ /*! \brief statistics collected during parallel symbolic factorization */ typedef struct { int_t fill_par; /* Estimation of fill. It corresponds to sp_ienv_dist(6) */ float relax_seps; /* relaxation parameter -not used in this version */ float relax_curSep; /* relaxation parameter -not used in this version */ float relax_gen; /* relaxation parameter -not used in this version */ /* number of operations performed during parallel symbolic factorization */ float nops; /* no of dense current separators per proc */ int_t nDnsCurSep; /* no of dense separators up per proc */ int_t nDnsUpSeps; float no_shmSnd; /* Number of auxiliary messages for send data */ float no_msgsSnd; /* Number of messages sending data */ int_t maxsz_msgSnd; /* Max size of messages sending data */ float sz_msgsSnd; /* Average size of messages sending data */ float no_shmRcvd; /* Number of auxiliary messages for rcvd data */ float no_msgsRcvd; /* Number of messages receiving data */ int_t maxsz_msgRcvd;/* Max size of messages receiving data */ float sz_msgsRcvd; /* Average size of messages receiving data */ float no_msgsCol; /* Number of messages sent for estimating size of rows/columns, setup information interLvl_symbfact, */ int_t maxsz_msgCol; /* Average size of messages counted in no_msgsCol */ float sz_msgsCol; /* Max size of messages counted in no_msgsCol */ /* statistics on fill-in */ float fill_pelt[6]; /* 0 - average fill per elt added during right-looking factorization 1 - max fill per elt added during right-looking factorization 2 - number vertices modified during right-looking factorization 3 - average fill per elt 4 - max fill per elt 5 - number vertices computed in upper levels of separator tree */ /* Memory usage */ int_t estimLSz; /* size of lsub due to right looking overestimation */ int_t estimUSz; /* size of usub due to right looking overestimation */ int_t maxSzLPr; /* maximum size of pruned L */ int_t maxSzUPr; /* maximum size of pruned U */ int_t maxSzBuf; /* maximum size of the send and receive buffers */ int_t szDnsSep; /* size of memory used when there are dense separators */ float allocMem; /* size of the total memory allocated (in bytes) */ } psymbfact_stat_t; /* MACROS */ /* Macros for comptuting the owner of a vertex and the local index corresponding to a vertex */ #define OWNER(x) ((x) / maxNvtcsPProc) #define LOCAL_IND(x) ((x) % maxNvtcsPProc) /* Macros for computing the available memory in lsub, usub */ #define MEM_LSUB(Llu, VInfo) (Llu->szLsub - VInfo->nnz_ainf_loc) #define MEM_USUB(Llu, VInfo) (Llu->szUsub - VInfo->nnz_asup_loc) #define tag_interLvl 2 #define tag_interLvl_LData 0 #define tag_interLvl_UData 1 #define tag_intraLvl_szMsg 1000 #define tag_intraLvl_LData 1001 #define tag_intraLvl_UData 1002 /* tag_intraLvl has to be the last tag number */ #define tag_intraLvl 1003 /* * Index of diagonal element, no of elements preceding each column/row * of L/U send to another processor */ #define DIAG_IND 0 #define NELTS_IND 1 #define RCVD_IND 2 #define SUCCES_RET 0 /* successful return from a routine */ #define ERROR_RET 1 /* error return code from a routine */ #define FILLED_SEP 2 /* the current separator is dense */ #define FILLED_SEPS 3 /* all the separators situated on the path from the current separator to the root separator are dense */ /* Code for the type of the memory to expand */ #define USUB_PR 0 #define LSUB_PR 1 /* Sherry: the following are already defined in superlu_enum_const.h #define USUB 0 #define LSUB 1 */ /* * Code for the type of computation - right looking (RL_SYMB); left * looking (LL_SYMB); symbolic factorization of an independent domain * (DOMAIN_SYMB); current separator is dense (DNS_CURSEP); all the * separators from the current one to the root of the tree are dense * (DNS_UPSEPS). */ #define RL_SYMB 0 #define DOMAIN_SYMB 1 #define LL_SYMB 2 #define DNS_UPSEPS 3 #define DNS_CURSEP 4 #endif /* __SUPERLU_DIST_PSYMBFACT */ SuperLU_DIST_5.3.0/SRC/pdgsmv.c0000644013363400111340000003171013233431301014731 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Parallel sparse matrix-vector multiplication * *
 * -- Distributed SuperLU routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * March 15, 2003
 * 
*/ #include #include "superlu_ddefs.h" void pdgsmv_init ( SuperMatrix *A, /* Matrix A permuted by columns (input/output). The type of A can be: Stype = SLU_NR_loc; Dtype = SLU_D; Mtype = SLU_GE. */ int_t *row_to_proc, /* Input. Mapping between rows and processes. */ gridinfo_t *grid, /* Input */ pdgsmv_comm_t *gsmv_comm /* Output. The data structure for communication. */ ) { NRformat_loc *Astore; int iam, p, procs; int *SendCounts, *RecvCounts; int_t i, j, k, l, m, m_loc, n, fst_row, jcol; int_t TotalIndSend, TotalValSend; int_t *colind, *rowptr; int_t *ind_tosend = NULL, *ind_torecv = NULL; int_t *ptr_ind_tosend, *ptr_ind_torecv; int_t *extern_start, *spa, *itemp; double *nzval, *val_tosend = NULL, *val_torecv = NULL, t; MPI_Request *send_req, *recv_req; MPI_Status status; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Enter pdgsmv_init()"); #endif /* ------------------------------------------------------------ INITIALIZATION. ------------------------------------------------------------*/ iam = grid->iam; procs = grid->nprow * grid->npcol; Astore = (NRformat_loc *) A->Store; m = A->nrow; n = A->ncol; m_loc = Astore->m_loc; fst_row = Astore->fst_row; colind = Astore->colind; rowptr = Astore->rowptr; nzval = Astore->nzval; if ( !(SendCounts = SUPERLU_MALLOC(2*procs * sizeof(int))) ) ABORT("Malloc fails for SendCounts[]"); /*for (i = 0; i < 2*procs; ++i) SendCounts[i] = 0;*/ RecvCounts = SendCounts + procs; if ( !(ptr_ind_tosend = intMalloc_dist(2*(procs+1))) ) ABORT("Malloc fails for ptr_ind_tosend[]"); ptr_ind_torecv = ptr_ind_tosend + procs + 1; if ( !(extern_start = intMalloc_dist(m_loc)) ) ABORT("Malloc fails for extern_start[]"); for (i = 0; i < m_loc; ++i) extern_start[i] = rowptr[i]; /* ------------------------------------------------------------ COUNT THE NUMBER OF X ENTRIES TO BE SENT TO EACH PROCESS. THIS IS THE UNION OF THE COLUMN INDICES OF MY ROWS. SWAP TO THE BEGINNING THE PART OF A CORRESPONDING TO THE LOCAL PART OF X. THIS ACCOUNTS FOR THE FIRST PASS OF ACCESSING MATRIX A. ------------------------------------------------------------*/ if ( !(spa = intCalloc_dist(n)) ) /* Aid in global to local translation */ ABORT("Malloc fails for spa[]"); for (p = 0; p < procs; ++p) SendCounts[p] = 0; for (i = 0; i < m_loc; ++i) { /* Loop through each row */ k = extern_start[i]; for (j = rowptr[i]; j < rowptr[i+1]; ++j) {/* Each nonzero in row i */ jcol = colind[j]; p = row_to_proc[jcol]; if ( p != iam ) { /* External */ if ( spa[jcol] == 0 ) { /* First time see this index */ ++SendCounts[p]; spa[jcol] = 1; } } else { /* Swap to beginning the part of A corresponding to the local part of X */ l = colind[k]; t = nzval[k]; colind[k] = jcol; nzval[k] = nzval[j]; colind[j] = l; nzval[j] = t; ++k; } } extern_start[i] = k; } /* ------------------------------------------------------------ LOAD THE X-INDICES TO BE SENT TO THE OTHER PROCESSES. THIS ACCOUNTS FOR THE SECOND PASS OF ACCESSING MATRIX A. ------------------------------------------------------------*/ /* Build pointers to ind_tosend[]. */ ptr_ind_tosend[0] = 0; for (p = 0, TotalIndSend = 0; p < procs; ++p) { TotalIndSend += SendCounts[p]; /* Total to send. */ ptr_ind_tosend[p+1] = ptr_ind_tosend[p] + SendCounts[p]; } #if 0 ptr_ind_tosend[iam] = 0; /* Local part of X */ #endif if ( TotalIndSend ) { if ( !(ind_tosend = intMalloc_dist(TotalIndSend)) ) ABORT("Malloc fails for ind_tosend[]"); /* Exclude local part of X */ } /* Build SPA to aid global to local translation. */ for (i = 0; i < n; ++i) spa[i] = EMPTY; for (i = 0; i < m_loc; ++i) { /* Loop through each row of A */ for (j = rowptr[i]; j < rowptr[i+1]; ++j) { jcol = colind[j]; if ( spa[jcol] == EMPTY ) { /* First time see this index */ p = row_to_proc[jcol]; if ( p == iam ) { /* Local */ /*assert(jcol>=fst_row);*/ spa[jcol] = jcol - fst_row; /* Relative position in local X */ } else { /* External */ ind_tosend[ptr_ind_tosend[p]] = jcol; /* Still global */ spa[jcol] = ptr_ind_tosend[p]; /* Position in ind_tosend[] */ ++ptr_ind_tosend[p]; } } } } /* ------------------------------------------------------------ TRANSFORM THE COLUMN INDICES OF MATRIX A INTO LOCAL INDICES. THIS ACCOUNTS FOR THE THIRD PASS OF ACCESSING MATRIX A. ------------------------------------------------------------*/ for (i = 0; i < m_loc; ++i) { for (j = rowptr[i]; j < rowptr[i+1]; ++j) { jcol = colind[j]; colind[j] = spa[jcol]; } } /* ------------------------------------------------------------ COMMUNICATE THE EXTERNAL INDICES OF X. ------------------------------------------------------------*/ MPI_Alltoall(SendCounts, 1, MPI_INT, RecvCounts, 1, MPI_INT, grid->comm); /* Build pointers to ind_torecv[]. */ ptr_ind_torecv[0] = 0; for (p = 0, TotalValSend = 0; p < procs; ++p) { TotalValSend += RecvCounts[p]; /* Total to receive. */ ptr_ind_torecv[p+1] = ptr_ind_torecv[p] + RecvCounts[p]; } if ( TotalValSend ) { if ( !(ind_torecv = intMalloc_dist(TotalValSend)) ) ABORT("Malloc fails for ind_torecv[]"); } if ( !(send_req = (MPI_Request *) SUPERLU_MALLOC(2*procs *sizeof(MPI_Request)))) ABORT("Malloc fails for recv_req[]."); recv_req = send_req + procs; for (p = 0; p < procs; ++p) { ptr_ind_tosend[p] -= SendCounts[p]; /* Reset pointer to beginning */ if ( SendCounts[p] ) { MPI_Isend(&ind_tosend[ptr_ind_tosend[p]], SendCounts[p], mpi_int_t, p, iam, grid->comm, &send_req[p]); } if ( RecvCounts[p] ) { MPI_Irecv(&ind_torecv[ptr_ind_torecv[p]], RecvCounts[p], mpi_int_t, p, p, grid->comm, &recv_req[p]); } } for (p = 0; p < procs; ++p) { if ( SendCounts[p] ) MPI_Wait(&send_req[p], &status); if ( RecvCounts[p] ) MPI_Wait(&recv_req[p], &status); } /* Allocate storage for the X values to to transferred. */ if ( TotalIndSend && !(val_torecv = doubleMalloc_dist(TotalIndSend)) ) ABORT("Malloc fails for val_torecv[]."); if ( TotalValSend && !(val_tosend = doubleMalloc_dist(TotalValSend)) ) ABORT("Malloc fails for val_tosend[]."); gsmv_comm->extern_start = extern_start; gsmv_comm->ind_tosend = ind_tosend; gsmv_comm->ind_torecv = ind_torecv; gsmv_comm->ptr_ind_tosend = ptr_ind_tosend; gsmv_comm->ptr_ind_torecv = ptr_ind_torecv; gsmv_comm->SendCounts = SendCounts; gsmv_comm->RecvCounts = RecvCounts; gsmv_comm->val_tosend = val_tosend; gsmv_comm->val_torecv = val_torecv; gsmv_comm->TotalIndSend = TotalIndSend; gsmv_comm->TotalValSend = TotalValSend; SUPERLU_FREE(spa); SUPERLU_FREE(send_req); #if ( DEBUGlevel>=2 ) PrintInt10("pdgsmv_init::rowptr", m_loc+1, rowptr); PrintInt10("pdgsmv_init::extern_start", m_loc, extern_start); #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pdgsmv_init()"); #endif } /* PDGSMV_INIT */ /* * Performs sparse matrix-vector multiplication. */ void pdgsmv ( int_t abs, /* Input. Do abs(A)*abs(x). */ SuperMatrix *A_internal, /* Input. Matrix A permuted by columns. The column indices are translated into the relative positions in the gathered x-vector. The type of A can be: Stype = NR_loc; Dtype = SLU_D; Mtype = GE. */ gridinfo_t *grid, /* Input */ pdgsmv_comm_t *gsmv_comm, /* Input. The data structure for communication. */ double x[], /* Input. The distributed source vector */ double ax[] /* Output. The distributed destination vector */ ) { NRformat_loc *Astore; int iam, procs; int_t i, j, p, m, m_loc, n, fst_row, jcol; int_t *colind, *rowptr; int *SendCounts, *RecvCounts; int_t *ind_tosend, *ind_torecv, *ptr_ind_tosend, *ptr_ind_torecv; int_t *extern_start, TotalValSend; double *nzval, *val_tosend, *val_torecv; double zero = 0.0; MPI_Request *send_req, *recv_req; MPI_Status status; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Enter pdgsmv()"); #endif /* ------------------------------------------------------------ INITIALIZATION. ------------------------------------------------------------*/ iam = grid->iam; procs = grid->nprow * grid->npcol; Astore = (NRformat_loc *) A_internal->Store; m = A_internal->nrow; n = A_internal->ncol; m_loc = Astore->m_loc; fst_row = Astore->fst_row; colind = Astore->colind; rowptr = Astore->rowptr; nzval = (double *) Astore->nzval; extern_start = gsmv_comm->extern_start; ind_torecv = gsmv_comm->ind_torecv; ptr_ind_tosend = gsmv_comm->ptr_ind_tosend; ptr_ind_torecv = gsmv_comm->ptr_ind_torecv; SendCounts = gsmv_comm->SendCounts; RecvCounts = gsmv_comm->RecvCounts; val_tosend = (double *) gsmv_comm->val_tosend; val_torecv = (double *) gsmv_comm->val_torecv; TotalValSend = gsmv_comm->TotalValSend; /* ------------------------------------------------------------ COPY THE X VALUES INTO THE SEND BUFFER. ------------------------------------------------------------*/ for (i = 0; i < TotalValSend; ++i) { j = ind_torecv[i] - fst_row; /* Relative index in x[] */ val_tosend[i] = x[j]; } /* ------------------------------------------------------------ COMMUNICATE THE X VALUES. ------------------------------------------------------------*/ if ( !(send_req = (MPI_Request *) SUPERLU_MALLOC(2*procs *sizeof(MPI_Request)))) ABORT("Malloc fails for recv_req[]."); recv_req = send_req + procs; for (p = 0; p < procs; ++p) { if ( RecvCounts[p] ) { MPI_Isend(&val_tosend[ptr_ind_torecv[p]], RecvCounts[p], MPI_DOUBLE, p, iam, grid->comm, &send_req[p]); } if ( SendCounts[p] ) { MPI_Irecv(&val_torecv[ptr_ind_tosend[p]], SendCounts[p], MPI_DOUBLE, p, p, grid->comm, &recv_req[p]); } } /* ------------------------------------------------------------ PERFORM THE ACTUAL MULTIPLICATION. ------------------------------------------------------------*/ if ( abs ) { /* Perform abs(A)*abs(x) */ /* Multiply the local part. */ for (i = 0; i < m_loc; ++i) { /* Loop through each row */ ax[i] = 0.0; for (j = rowptr[i]; j < extern_start[i]; ++j) { jcol = colind[j]; ax[i] += fabs(nzval[j]) * fabs(x[jcol]); } } for (p = 0; p < procs; ++p) { if ( RecvCounts[p] ) MPI_Wait(&send_req[p], &status); if ( SendCounts[p] ) MPI_Wait(&recv_req[p], &status); } /* Multiply the external part. */ for (i = 0; i < m_loc; ++i) { /* Loop through each row */ for (j = extern_start[i]; j < rowptr[i+1]; ++j) { jcol = colind[j]; ax[i] += fabs(nzval[j]) * fabs(val_torecv[jcol]); } } } else { /* Multiply the local part. */ for (i = 0; i < m_loc; ++i) { /* Loop through each row */ ax[i] = zero; for (j = rowptr[i]; j < extern_start[i]; ++j) { jcol = colind[j]; ax[i] += nzval[j] * x[jcol]; } } for (p = 0; p < procs; ++p) { if ( RecvCounts[p] ) MPI_Wait(&send_req[p], &status); if ( SendCounts[p] ) MPI_Wait(&recv_req[p], &status); } /* Multiply the external part. */ for (i = 0; i < m_loc; ++i) { /* Loop through each row */ for (j = extern_start[i]; j < rowptr[i+1]; ++j) { jcol = colind[j]; ax[i] += nzval[j] * val_torecv[jcol]; } } } SUPERLU_FREE(send_req); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pdgsmv()"); #endif } /* PDGSMV */ void pdgsmv_finalize(pdgsmv_comm_t *gsmv_comm) { int_t *it; double *dt; SUPERLU_FREE(gsmv_comm->extern_start); if ( it = gsmv_comm->ind_tosend ) SUPERLU_FREE(it); if ( it = gsmv_comm->ind_torecv ) SUPERLU_FREE(it); SUPERLU_FREE(gsmv_comm->ptr_ind_tosend); SUPERLU_FREE(gsmv_comm->SendCounts); if ( dt = gsmv_comm->val_tosend ) SUPERLU_FREE(dt); if ( dt = gsmv_comm->val_torecv ) SUPERLU_FREE(dt); } SuperLU_DIST_5.3.0/SRC/machines.h0000644013363400111340000000233013233431301015221 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief These macros define which machine will be used * *
 * -- SuperLU MT routine (version 1.0) --
 * Univ. of California Berkeley, Xerox Palo Alto Research Center,
 * and Lawrence Berkeley National Lab.
 * August 15, 1997
 *
 * These macros define which machine will be used.
 * 
*/ #ifndef __SUPERLU_MACHINES /* allow multiple inclusions */ #define __SUPERLU_MACHINES #define SGI 0 #define ORIGIN 1 #define DEC 2 #define CRAY_T3E 3 #define SUN 4 #define PTHREAD 5 #define IBM 6 #ifdef _SGI #define MACH SGI #endif #ifdef _ORIGIN #define MACH ORIGIN #endif #ifdef _DEC #define MACH DEC #endif #ifdef _CRAY #define MACH CRAY_T3E #endif #ifdef _SOLARIS #define MACH SUN #endif #ifdef _PTHREAD #define MACH PTHREAD #endif #if ( defined(_SP2) || defined(_SP) ) #define MACH IBM #endif #endif /* __SUPERLU_MACHINES */ SuperLU_DIST_5.3.0/SRC/util.c0000644013363400111340000010044413233431301014407 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Utilities functions * *
 * -- Distributed SuperLU routine (version 5.3) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * February 1, 2003
 *
 * Modified: March 31, 2013
 *           January 29, 2018
 * 
*/ #include #include "superlu_ddefs.h" /*! \brief Deallocate the structure pointing to the actual storage of the matrix. */ void Destroy_SuperMatrix_Store_dist(SuperMatrix *A) { SUPERLU_FREE ( A->Store ); } void Destroy_CompCol_Matrix_dist(SuperMatrix *A) { NCformat *Astore = A->Store; SUPERLU_FREE( Astore->rowind ); SUPERLU_FREE( Astore->colptr ); if ( Astore->nzval ) SUPERLU_FREE( Astore->nzval ); SUPERLU_FREE( Astore ); } void Destroy_CompRowLoc_Matrix_dist(SuperMatrix *A) { NRformat_loc *Astore = A->Store; SUPERLU_FREE( Astore->rowptr ); SUPERLU_FREE( Astore->colind ); SUPERLU_FREE( Astore->nzval ); SUPERLU_FREE( Astore ); } void Destroy_CompRow_Matrix_dist(SuperMatrix *A) { SUPERLU_FREE( ((NRformat *)A->Store)->rowptr ); SUPERLU_FREE( ((NRformat *)A->Store)->colind ); SUPERLU_FREE( ((NRformat *)A->Store)->nzval ); SUPERLU_FREE( A->Store ); } void Destroy_SuperNode_Matrix_dist(SuperMatrix *A) { SUPERLU_FREE ( ((SCformat *)A->Store)->rowind ); SUPERLU_FREE ( ((SCformat *)A->Store)->rowind_colptr ); SUPERLU_FREE ( ((SCformat *)A->Store)->nzval ); SUPERLU_FREE ( ((SCformat *)A->Store)->nzval_colptr ); SUPERLU_FREE ( ((SCformat *)A->Store)->col_to_sup ); SUPERLU_FREE ( ((SCformat *)A->Store)->sup_to_col ); SUPERLU_FREE ( A->Store ); } /*! \brief A is of type Stype==NCP */ void Destroy_CompCol_Permuted_dist(SuperMatrix *A) { SUPERLU_FREE ( ((NCPformat *)A->Store)->colbeg ); SUPERLU_FREE ( ((NCPformat *)A->Store)->colend ); SUPERLU_FREE ( A->Store ); } /*! \brief A is of type Stype==DN */ void Destroy_Dense_Matrix_dist(SuperMatrix *A) { DNformat* Astore = A->Store; SUPERLU_FREE (Astore->nzval); SUPERLU_FREE ( A->Store ); } /*! \brief Destroy distributed L & U matrices. */ void Destroy_LU(int_t n, gridinfo_t *grid, LUstruct_t *LUstruct) { int_t i, nb, nsupers; Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; #if ( DEBUGlevel>=1 ) int iam; MPI_Comm_rank( MPI_COMM_WORLD, &iam ); CHECK_MALLOC(iam, "Enter Destroy_LU()"); #endif nsupers = Glu_persist->supno[n-1] + 1; nb = CEILING(nsupers, grid->npcol); for (i = 0; i < nb; ++i) if ( Llu->Lrowind_bc_ptr[i] ) { SUPERLU_FREE (Llu->Lrowind_bc_ptr[i]); #ifdef GPU_ACC checkCuda(cudaFreeHost(Llu->Lnzval_bc_ptr[i])); #else SUPERLU_FREE (Llu->Lnzval_bc_ptr[i]); #endif } SUPERLU_FREE (Llu->Lrowind_bc_ptr); SUPERLU_FREE (Llu->Lnzval_bc_ptr); nb = CEILING(nsupers, grid->nprow); for (i = 0; i < nb; ++i) if ( Llu->Ufstnz_br_ptr[i] ) { SUPERLU_FREE (Llu->Ufstnz_br_ptr[i]); SUPERLU_FREE (Llu->Unzval_br_ptr[i]); } SUPERLU_FREE (Llu->Ufstnz_br_ptr); SUPERLU_FREE (Llu->Unzval_br_ptr); /* The following can be freed after factorization. */ SUPERLU_FREE(Llu->ToRecv); SUPERLU_FREE(Llu->ToSendD); SUPERLU_FREE(Llu->ToSendR[0]); SUPERLU_FREE(Llu->ToSendR); /* The following can be freed only after iterative refinement. */ SUPERLU_FREE(Llu->ilsum); SUPERLU_FREE(Llu->fmod); SUPERLU_FREE(Llu->fsendx_plist[0]); SUPERLU_FREE(Llu->fsendx_plist); SUPERLU_FREE(Llu->bmod); SUPERLU_FREE(Llu->bsendx_plist[0]); SUPERLU_FREE(Llu->bsendx_plist); SUPERLU_FREE(Llu->mod_bit); SUPERLU_FREE(Glu_persist->xsup); SUPERLU_FREE(Glu_persist->supno); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit Destroy_LU()"); #endif } /*! \brief Allocate storage in ScalePermstruct */ void ScalePermstructInit(const int_t m, const int_t n, ScalePermstruct_t *ScalePermstruct) { ScalePermstruct->DiagScale = NOEQUIL; if ( !(ScalePermstruct->perm_r = intMalloc_dist(m)) ) ABORT("Malloc fails for perm_r[]."); if ( !(ScalePermstruct->perm_c = intMalloc_dist(n)) ) ABORT("Malloc fails for perm_c[]."); } /*! \brief Deallocate ScalePermstruct */ void ScalePermstructFree(ScalePermstruct_t *ScalePermstruct) { SUPERLU_FREE(ScalePermstruct->perm_r); SUPERLU_FREE(ScalePermstruct->perm_c); switch ( ScalePermstruct->DiagScale ) { case ROW: SUPERLU_FREE(ScalePermstruct->R); break; case COL: SUPERLU_FREE(ScalePermstruct->C); break; case BOTH: SUPERLU_FREE(ScalePermstruct->R); SUPERLU_FREE(ScalePermstruct->C); break; } } /*! \brief Allocate storage in LUstruct */ void LUstructInit(const int_t n, LUstruct_t *LUstruct) { if ( !(LUstruct->etree = intMalloc_dist(n)) ) ABORT("Malloc fails for etree[]."); if ( !(LUstruct->Glu_persist = (Glu_persist_t *) SUPERLU_MALLOC(sizeof(Glu_persist_t))) ) ABORT("Malloc fails for Glu_persist_t."); if ( !(LUstruct->Llu = (LocalLU_t *) SUPERLU_MALLOC(sizeof(LocalLU_t))) ) ABORT("Malloc fails for LocalLU_t."); } /*! \brief Deallocate LUstruct */ void LUstructFree(LUstruct_t *LUstruct) { #if ( DEBUGlevel>=1 ) int iam; MPI_Comm_rank( MPI_COMM_WORLD, &iam ); CHECK_MALLOC(iam, "Enter LUstructFree()"); #endif SUPERLU_FREE(LUstruct->etree); SUPERLU_FREE(LUstruct->Glu_persist); SUPERLU_FREE(LUstruct->Llu); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit LUstructFree()"); #endif } /*! \brief * *
 * Count the total number of nonzeros in factors L and U,  and in the 
 * symmetrically reduced L. 
 * 
*/ void countnz_dist(const int_t n, int_t *xprune, long long int *nnzL, long long int *nnzU, Glu_persist_t *Glu_persist, Glu_freeable_t *Glu_freeable) { int_t fnz, fsupc, i, j, nsuper; int_t jlen, irep; long long int nnzL0; int_t *supno, *xsup, *xlsub, *xusub, *usub; supno = Glu_persist->supno; xsup = Glu_persist->xsup; xlsub = Glu_freeable->xlsub; xusub = Glu_freeable->xusub; usub = Glu_freeable->usub; *nnzL = 0; *nnzU = 0; nnzL0 = 0; nsuper = supno[n]; if ( n <= 0 ) return; /* * For each supernode in L. */ for (i = 0; i <= nsuper; i++) { fsupc = xsup[i]; jlen = xlsub[fsupc+1] - xlsub[fsupc]; for (j = fsupc; j < xsup[i+1]; j++) { *nnzL += jlen; *nnzU += j - fsupc + 1; jlen--; } irep = xsup[i+1] - 1; nnzL0 += xprune[irep] - xlsub[irep]; } /* printf("\tNo of nonzeros in symm-reduced L = %ld\n", nnzL0);*/ /* For each column in U. */ for (j = 0; j < n; ++j) { for (i = xusub[j]; i < xusub[j+1]; ++i) { fnz = usub[i]; fsupc = xsup[supno[fnz]+1]; *nnzU += fsupc - fnz; } } } /*! \brief * *
 * Fix up the data storage lsub for L-subscripts. It removes the subscript
 * sets for structural pruning,	and applies permuation to the remaining
 * subscripts.
 * 
*/ long long int fixupL_dist(const int_t n, const int_t *perm_r, Glu_persist_t *Glu_persist, Glu_freeable_t *Glu_freeable) { register int_t nsuper, fsupc, nextl, i, j, k, jstrt; register long long int lsub_size; int_t *xsup, *lsub, *xlsub; if ( n <= 1 ) return 0; xsup = Glu_persist->xsup; lsub = Glu_freeable->lsub; xlsub = Glu_freeable->xlsub; nextl = 0; nsuper = (Glu_persist->supno)[n]; lsub_size = xlsub[n]; /* * For each supernode ... */ for (i = 0; i <= nsuper; i++) { fsupc = xsup[i]; jstrt = xlsub[fsupc]; xlsub[fsupc] = nextl; for (j = jstrt; j < xlsub[fsupc+1]; j++) { lsub[nextl] = perm_r[lsub[j]]; /* Now indexed into P*A */ nextl++; } for (k = fsupc+1; k < xsup[i+1]; k++) xlsub[k] = nextl; /* Other columns in supernode i */ } xlsub[n] = nextl; return lsub_size; } /*! \brief Set the default values for the options argument. */ void set_default_options_dist(superlu_dist_options_t *options) { options->Fact = DOFACT; options->Equil = YES; options->ParSymbFact = NO; #ifdef HAVE_PARMETIS options->ColPerm = METIS_AT_PLUS_A; #else options->ColPerm = MMD_AT_PLUS_A; #endif options->RowPerm = LargeDiag; options->ReplaceTinyPivot = NO; options->IterRefine = SLU_DOUBLE; options->Trans = NOTRANS; options->SolveInitialized = NO; options->RefineInitialized = NO; options->PrintStat = YES; options->num_lookaheads = 10; options->lookahead_etree = NO; options->SymPattern = NO; } /*! \brief Print the options setting. */ void print_options_dist(superlu_dist_options_t *options) { if ( options->PrintStat == NO ) return; printf("**************************************************\n"); printf(".. options:\n"); printf("** Fact : %4d\n", options->Fact); printf("** Equil : %4d\n", options->Equil); printf("** ParSymbFact : %4d\n", options->ParSymbFact); printf("** ColPerm : %4d\n", options->ColPerm); printf("** RowPerm : %4d\n", options->RowPerm); printf("** ReplaceTinyPivot : %4d\n", options->ReplaceTinyPivot); printf("** IterRefine : %4d\n", options->IterRefine); printf("** Trans : %4d\n", options->Trans); printf("** num_lookaheads : %4d\n", options->num_lookaheads); printf("** SymPattern : %4d\n", options->SymPattern); printf("** lookahead_etree : %4d\n", options->lookahead_etree); printf("**************************************************\n"); } /*! \brief Print the blocking parameters. */ void print_sp_ienv_dist(superlu_dist_options_t *options) { if ( options->PrintStat == NO ) return; printf("**************************************************\n"); printf(".. blocking parameters from sp_ienv():\n"); printf("** relaxation : " IFMT "\n", sp_ienv_dist(2)); printf("** max supernode : " IFMT "\n", sp_ienv_dist(3)); printf("** estimated fill ratio : " IFMT "\n", sp_ienv_dist(6)); printf("** min GEMM dimension for GPU : " IFMT "\n", sp_ienv_dist(7)); printf("**************************************************\n"); } /*! \brief * *
 * Purpose
 * =======
 *   Set up the communication pattern for redistribution between B and X
 *   in the triangular solution.
 * 
 * Arguments
 * =========
 *
 * n      (input) int (global)
 *        The dimension of the linear system.
 *
 * m_loc  (input) int (local)
 *        The local row dimension of the distributed input matrix.
 *
 * nrhs   (input) int (global)
 *        Number of right-hand sides.
 *
 * fst_row (input) int (global)
 *        The row number of matrix B's first row in the global matrix.
 *
 * perm_r (input) int* (global)
 *        The row permutation vector.
 *
 * perm_c (input) int* (global)
 *        The column permutation vector.
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh.
 * 
*/ int_t pxgstrs_init(int_t n, int_t m_loc, int_t nrhs, int_t fst_row, int_t perm_r[], int_t perm_c[], gridinfo_t *grid, Glu_persist_t *Glu_persist, SOLVEstruct_t *SOLVEstruct) { int *SendCnt, *SendCnt_nrhs, *RecvCnt, *RecvCnt_nrhs; int *sdispls, *sdispls_nrhs, *rdispls, *rdispls_nrhs; int *itemp, *ptr_to_ibuf, *ptr_to_dbuf; int_t *row_to_proc; int_t i, gbi, k, l, num_diag_procs, *diag_procs; int_t irow, q, knsupc, nsupers, *xsup, *supno; int iam, p, pkk, procs; pxgstrs_comm_t *gstrs_comm; procs = grid->nprow * grid->npcol; iam = grid->iam; gstrs_comm = SOLVEstruct->gstrs_comm; xsup = Glu_persist->xsup; supno = Glu_persist->supno; nsupers = Glu_persist->supno[n-1] + 1; row_to_proc = SOLVEstruct->row_to_proc; /* ------------------------------------------------------------ SET UP COMMUNICATION PATTERN FOR ReDistribute_B_to_X. ------------------------------------------------------------*/ if ( !(itemp = SUPERLU_MALLOC(8*procs * sizeof(int))) ) ABORT("Malloc fails for B_to_X_itemp[]."); SendCnt = itemp; SendCnt_nrhs = itemp + procs; RecvCnt = itemp + 2*procs; RecvCnt_nrhs = itemp + 3*procs; sdispls = itemp + 4*procs; sdispls_nrhs = itemp + 5*procs; rdispls = itemp + 6*procs; rdispls_nrhs = itemp + 7*procs; /* Count the number of elements to be sent to each diagonal process.*/ for (p = 0; p < procs; ++p) SendCnt[p] = 0; for (i = 0, l = fst_row; i < m_loc; ++i, ++l) { irow = perm_c[perm_r[l]]; /* Row number in Pc*Pr*B */ gbi = BlockNum( irow ); p = PNUM( PROW(gbi,grid), PCOL(gbi,grid), grid ); /* Diagonal process */ ++SendCnt[p]; } /* Set up the displacements for alltoall. */ MPI_Alltoall(SendCnt, 1, MPI_INT, RecvCnt, 1, MPI_INT, grid->comm); sdispls[0] = rdispls[0] = 0; for (p = 1; p < procs; ++p) { sdispls[p] = sdispls[p-1] + SendCnt[p-1]; rdispls[p] = rdispls[p-1] + RecvCnt[p-1]; } for (p = 0; p < procs; ++p) { SendCnt_nrhs[p] = SendCnt[p] * nrhs; sdispls_nrhs[p] = sdispls[p] * nrhs; RecvCnt_nrhs[p] = RecvCnt[p] * nrhs; rdispls_nrhs[p] = rdispls[p] * nrhs; } /* This is saved for repeated solves, and is freed in pxgstrs_finalize().*/ gstrs_comm->B_to_X_SendCnt = SendCnt; /* ------------------------------------------------------------ SET UP COMMUNICATION PATTERN FOR ReDistribute_X_to_B. ------------------------------------------------------------*/ /* This is freed in pxgstrs_finalize(). */ if ( !(itemp = SUPERLU_MALLOC(8*procs * sizeof(int))) ) ABORT("Malloc fails for X_to_B_itemp[]."); SendCnt = itemp; SendCnt_nrhs = itemp + procs; RecvCnt = itemp + 2*procs; RecvCnt_nrhs = itemp + 3*procs; sdispls = itemp + 4*procs; sdispls_nrhs = itemp + 5*procs; rdispls = itemp + 6*procs; rdispls_nrhs = itemp + 7*procs; /* Count the number of X entries to be sent to each process.*/ for (p = 0; p < procs; ++p) SendCnt[p] = 0; num_diag_procs = SOLVEstruct->num_diag_procs; diag_procs = SOLVEstruct->diag_procs; for (p = 0; p < num_diag_procs; ++p) { /* for all diagonal processes */ pkk = diag_procs[p]; if ( iam == pkk ) { for (k = p; k < nsupers; k += num_diag_procs) { knsupc = SuperSize( k ); irow = FstBlockC( k ); for (i = 0; i < knsupc; ++i) { #if 0 q = row_to_proc[inv_perm_c[irow]]; #else q = row_to_proc[irow]; #endif ++SendCnt[q]; ++irow; } } } } MPI_Alltoall(SendCnt, 1, MPI_INT, RecvCnt, 1, MPI_INT, grid->comm); sdispls[0] = rdispls[0] = 0; sdispls_nrhs[0] = rdispls_nrhs[0] = 0; SendCnt_nrhs[0] = SendCnt[0] * nrhs; RecvCnt_nrhs[0] = RecvCnt[0] * nrhs; for (p = 1; p < procs; ++p) { sdispls[p] = sdispls[p-1] + SendCnt[p-1]; rdispls[p] = rdispls[p-1] + RecvCnt[p-1]; sdispls_nrhs[p] = sdispls[p] * nrhs; rdispls_nrhs[p] = rdispls[p] * nrhs; SendCnt_nrhs[p] = SendCnt[p] * nrhs; RecvCnt_nrhs[p] = RecvCnt[p] * nrhs; } /* This is saved for repeated solves, and is freed in pxgstrs_finalize().*/ gstrs_comm->X_to_B_SendCnt = SendCnt; if ( !(ptr_to_ibuf = SUPERLU_MALLOC(2*procs * sizeof(int))) ) ABORT("Malloc fails for ptr_to_ibuf[]."); gstrs_comm->ptr_to_ibuf = ptr_to_ibuf; gstrs_comm->ptr_to_dbuf = ptr_to_ibuf + procs; return 0; } /* PXGSTRS_INIT */ void pxgstrs_finalize(pxgstrs_comm_t *gstrs_comm) { SUPERLU_FREE(gstrs_comm->B_to_X_SendCnt); SUPERLU_FREE(gstrs_comm->X_to_B_SendCnt); SUPERLU_FREE(gstrs_comm->ptr_to_ibuf); SUPERLU_FREE(gstrs_comm); } /*! \brief Diagnostic print of segment info after panel_dfs(). */ void print_panel_seg_dist(int_t n, int_t w, int_t jcol, int_t nseg, int_t *segrep, int_t *repfnz) { int_t j, k; for (j = jcol; j < jcol+w; j++) { printf("\tcol " IFMT ":\n", j); for (k = 0; k < nseg; k++) printf("\t\tseg " IFMT ", segrep " IFMT ", repfnz " IFMT "\n", k, segrep[k], repfnz[(j-jcol)*n + segrep[k]]); } } void PStatInit(SuperLUStat_t *stat) { register int_t i; if ( !(stat->utime = SUPERLU_MALLOC(NPHASES*sizeof(double))) ) ABORT("Malloc fails for stat->utime[]"); if ( !(stat->ops = (flops_t *) SUPERLU_MALLOC(NPHASES * sizeof(flops_t))) ) ABORT("SUPERLU_MALLOC fails for stat->ops[]"); for (i = 0; i < NPHASES; ++i) { stat->utime[i] = 0.; stat->ops[i] = 0.; } stat->TinyPivots = stat->RefineSteps = 0; } void PStatPrint(superlu_dist_options_t *options, SuperLUStat_t *stat, gridinfo_t *grid) { double *utime = stat->utime; flops_t *ops = stat->ops; int_t iam = grid->iam; flops_t flopcnt, factflop, solveflop; if ( options->PrintStat == NO ) return; if ( !iam && options->Fact != FACTORED ) { printf("**************************************************\n"); printf("**** Time (seconds) ****\n"); if ( options->Equil != NO ) printf("\tEQUIL time %8.2f\n", utime[EQUIL]); if ( options->RowPerm != NOROWPERM ) printf("\tROWPERM time %8.2f\n", utime[ROWPERM]); if ( options->ColPerm != NATURAL ) printf("\tCOLPERM time %8.2f\n", utime[COLPERM]); printf("\tSYMBFACT time %8.2f\n", utime[SYMBFAC]); printf("\tDISTRIBUTE time %8.2f\n", utime[DIST]); } MPI_Reduce(&ops[FACT], &flopcnt, 1, MPI_FLOAT, MPI_SUM, 0, grid->comm); factflop = flopcnt; if ( !iam && options->Fact != FACTORED ) { printf("\tFACTOR time %8.2f\n", utime[FACT]); if ( utime[FACT] != 0.0 ) printf("\tFactor flops\t%e\tMflops \t%8.2f\n", flopcnt, flopcnt*1e-6/utime[FACT]); } MPI_Reduce(&ops[SOLVE], &flopcnt, 1, MPI_FLOAT, MPI_SUM, 0, grid->comm); solveflop = flopcnt; if ( !iam ) { printf("\tSOLVE time %8.2f\n", utime[SOLVE]); if ( utime[SOLVE] != 0.0 ) printf("\tSolve flops\t%e\tMflops \t%8.2f\n", flopcnt, flopcnt*1e-6/utime[SOLVE]); if ( options->IterRefine != NOREFINE ) { printf("\tREFINEMENT time %8.2f\tSteps%8d\n\n", utime[REFINE], stat->RefineSteps); } printf("**************************************************\n"); } #if ( PROFlevel>=1 ) fflush(stdout); MPI_Barrier( grid->comm ); { int_t i, P = grid->nprow*grid->npcol; flops_t b, maxflop; if ( !iam ) printf("\n.. FACT time breakdown:\tcomm\ttotal\n"); for (i = 0; i < P; ++i) { if ( iam == i) { printf("\t\t(%d)%8.2f%8.2f\n", iam, utime[COMM], utime[FACT]); fflush(stdout); } MPI_Barrier( grid->comm ); } if ( !iam ) printf("\n.. FACT ops distribution:\n"); for (i = 0; i < P; ++i) { if ( iam == i ) { printf("\t\t(%d)\t%e\n", iam, ops[FACT]); fflush(stdout); } MPI_Barrier( grid->comm ); } MPI_Reduce(&ops[FACT], &maxflop, 1, MPI_FLOAT, MPI_MAX, 0, grid->comm); if ( !iam ) { b = factflop/P/maxflop; printf("\tFACT load balance: %.2f\n", b); } if ( !iam ) printf("\n.. SOLVE ops distribution:\n"); for (i = 0; i < P; ++i) { if ( iam == i ) { printf("\t\t%d\t%e\n", iam, ops[SOLVE]); fflush(stdout); } MPI_Barrier( grid->comm ); } MPI_Reduce(&ops[SOLVE], &maxflop, 1, MPI_FLOAT, MPI_MAX, 0,grid->comm); if ( !iam ) { b = solveflop/P/maxflop; printf("\tSOLVE load balance: %.2f\n", b); } } #endif /* if ( !iam ) fflush(stdout); CRASH THE SYSTEM pierre. */ } void PStatFree(SuperLUStat_t *stat) { SUPERLU_FREE(stat->utime); SUPERLU_FREE(stat->ops); } /*! \brief Fills an integer array with a given value. */ void ifill_dist(int_t *a, int_t alen, int_t ival) { register int_t i; for (i = 0; i < alen; i++) a[i] = ival; } void get_diag_procs(int_t n, Glu_persist_t *Glu_persist, gridinfo_t *grid, int_t *num_diag_procs, int_t **diag_procs, int_t **diag_len) { int_t i, j, k, knsupc, nprow, npcol, nsupers, pkk; int_t *xsup; i = j = *num_diag_procs = pkk = 0; nprow = grid->nprow; npcol = grid->npcol; nsupers = Glu_persist->supno[n-1] + 1; xsup = Glu_persist->xsup; do { ++(*num_diag_procs); i = (++i) % nprow; j = (++j) % npcol; pkk = PNUM( i, j, grid ); } while ( pkk != 0 ); /* Until wrap back to process 0 */ if ( !(*diag_procs = intMalloc_dist(*num_diag_procs)) ) ABORT("Malloc fails for diag_procs[]"); if ( !(*diag_len = intCalloc_dist(*num_diag_procs)) ) ABORT("Calloc fails for diag_len[]"); for (i = j = k = 0; k < *num_diag_procs; ++k) { pkk = PNUM( i, j, grid ); (*diag_procs)[k] = pkk; i = (++i) % nprow; j = (++j) % npcol; } for (k = 0; k < nsupers; ++k) { knsupc = SuperSize( k ); i = k % *num_diag_procs; (*diag_len)[i] += knsupc; } } /*! \brief Get the statistics of the supernodes */ #define NBUCKS 10 static int_t max_sup_size; void super_stats_dist(int_t nsuper, int_t *xsup) { register int_t nsup1 = 0; int_t i, isize, whichb, bl, bh; int_t bucket[NBUCKS]; max_sup_size = 0; for (i = 0; i <= nsuper; i++) { isize = xsup[i+1] - xsup[i]; if ( isize == 1 ) nsup1++; if ( max_sup_size < isize ) max_sup_size = isize; } printf(" Supernode statistics:\n\tno of super = " IFMT "\n", nsuper+1); printf("\tmax supernode size = " IFMT "\n", max_sup_size); printf("\tno of size 1 supernodes = " IFMT "\n", nsup1); /* Histogram of the supernode sizes */ ifill_dist (bucket, NBUCKS, 0); for (i = 0; i <= nsuper; i++) { isize = xsup[i+1] - xsup[i]; whichb = (float) isize / max_sup_size * NBUCKS; if (whichb >= NBUCKS) whichb = NBUCKS - 1; bucket[whichb]++; } printf("\tHistogram of supernode sizes:\n"); for (i = 0; i < NBUCKS; i++) { bl = (float) i * max_sup_size / NBUCKS; bh = (float) (i+1) * max_sup_size / NBUCKS; printf("\tsnode: " IFMT "-" IFMT "\t\t" IFMT "\n", bl+1, bh, bucket[i]); } } /*! \brief Check whether repfnz[] == EMPTY after reset. */ void check_repfnz_dist(int_t n, int_t w, int_t jcol, int_t *repfnz) { int_t jj, k; for (jj = jcol; jj < jcol+w; jj++) for (k = 0; k < n; k++) if ( repfnz[(jj-jcol)*n + k] != EMPTY ) { fprintf(stderr, "col " IFMT ", repfnz_col[" IFMT "] = " IFMT "\n", jj, k, repfnz[(jj-jcol)*n + k]); ABORT("check_repfnz_dist"); } } void PrintInt10(char *name, int_t len, int_t *x) { register int_t i; printf("%10s:", name); for (i = 0; i < len; ++i) { if ( i % 10 == 0 ) printf("\n\t[" IFMT "-" IFMT "]", i, i+9); printf(IFMT, x[i]); } printf("\n"); } void PrintInt32(char *name, int len, int *x) { register int i; printf("%10s:", name); for (i = 0; i < len; ++i) { if ( i % 10 == 0 ) printf("\n\t[%2d-%2d]", i, i+9); printf("%6d", x[i]); } printf("\n"); } int file_PrintInt10(FILE *fp, char *name, int_t len, int_t *x) { register int_t i; fprintf(fp, "%10s:", name); for (i = 0; i < len; ++i) { if ( i % 10 == 0 ) fprintf(fp, "\n\t[" IFMT "-" IFMT "]", i, i+9); fprintf(fp, IFMT, x[i]); } fprintf(fp, "\n"); return 0; } int file_PrintInt32(FILE *fp, char *name, int len, int *x) { register int i; fprintf(fp, "%10s:", name); for (i = 0; i < len; ++i) { if ( i % 10 == 0 ) fprintf(fp, "\n\t[%2d-%2d]", i, i+9); fprintf(fp, "%6d", x[i]); } fprintf(fp, "\n"); return 0; } int_t CheckZeroDiagonal(int_t n, int_t *rowind, int_t *colbeg, int_t *colcnt) { register int_t i, j, zd, numzd = 0; for (j = 0; j < n; ++j) { zd = 0; for (i = colbeg[j]; i < colbeg[j]+colcnt[j]; ++i) { /*if ( iperm[rowind[i]] == j ) zd = 1;*/ if ( rowind[i] == j ) { zd = 1; break; } } if ( zd == 0 ) { #if ( PRNTlevel>=2 ) printf(".. Diagonal of column %d is zero.\n", j); #endif ++numzd; } } return numzd; } /* --------------------------------------------------------------------------- */ void isort(int_t N, int_t *ARRAY1, int_t *ARRAY2) { /* * Purpose * ======= * Use quick sort algorithm to sort ARRAY1 and ARRAY2 in the increasing * order of ARRAY1. * * Arguments * ========= * N (input) INTEGER * On entry, specifies the size of the arrays. * * ARRAY1 (input/output) DOUBLE PRECISION ARRAY of LENGTH N * On entry, contains the array to be sorted. * On exit, contains the sorted array. * * ARRAY2 (input/output) DOUBLE PRECISION ARRAY of LENGTH N * On entry, contains the array to be sorted. * On exit, contains the sorted array. */ int_t IGAP, I, J; int_t TEMP; IGAP = N / 2; while (IGAP > 0) { for (I = IGAP; I < N; I++) { J = I - IGAP; while (J >= 0) { if (ARRAY1[J] > ARRAY1[J + IGAP]) { TEMP = ARRAY1[J]; ARRAY1[J] = ARRAY1[J + IGAP]; ARRAY1[J + IGAP] = TEMP; TEMP = ARRAY2[J]; ARRAY2[J] = ARRAY2[J + IGAP]; ARRAY2[J + IGAP] = TEMP; J = J - IGAP; } else { break; } } } IGAP = IGAP / 2; } } void isort1(int_t N, int_t *ARRAY) { /* * Purpose * ======= * Use quick sort algorithm to sort ARRAY in increasing order. * * Arguments * ========= * N (input) INTEGER * On entry, specifies the size of the arrays. * * ARRAY (input/output) DOUBLE PRECISION ARRAY of LENGTH N * On entry, contains the array to be sorted. * On exit, contains the sorted array. * */ int_t IGAP, I, J; int_t TEMP; IGAP = N / 2; while (IGAP > 0) { for (I = IGAP; I < N; I++) { J = I - IGAP; while (J >= 0) { if (ARRAY[J] > ARRAY[J + IGAP]) { TEMP = ARRAY[J]; ARRAY[J] = ARRAY[J + IGAP]; ARRAY[J + IGAP] = TEMP; J = J - IGAP; } else { break; } } } IGAP = IGAP / 2; } } void log_memory(long long cur_bytes, SuperLUStat_t *stat) { stat->current_buffer += (float) cur_bytes; if (cur_bytes > 0) { stat->peak_buffer = SUPERLU_MAX(stat->peak_buffer, stat->current_buffer); } } void print_memorylog(SuperLUStat_t *stat, char *msg) { printf("__ %s (MB):\n\tcurrent_buffer : %8.2f\tpeak_buffer : %8.2f\n", msg, stat->current_buffer, stat->peak_buffer); } int compare_pair (const void *a, const void *b) { return (((struct superlu_pair *) a)->val - ((struct superlu_pair *) b)->val); } int get_thread_per_process() { char* ttemp; ttemp = getenv("THREAD_PER_PROCESS"); if(ttemp) return atoi(ttemp); else return 1; } int_t get_max_buffer_size () { char *ttemp; ttemp = getenv ("MAX_BUFFER_SIZE"); if (ttemp) return atoi (ttemp); else return 5000000; } int_t get_cublas_nb () { char *ttemp; ttemp = getenv ("CUBLAS_NB"); if (ttemp) return atoi (ttemp); else return 64; } int_t get_num_cuda_streams () { char *ttemp; ttemp = getenv ("NUM_CUDA_STREAMS"); if (ttemp) return atoi (ttemp); else return 8; } int_t get_min (int_t * sums, int_t nprocs) { int_t min_ind, min_val; min_ind = 0; min_val = 2147483647; for (int i = 0; i < nprocs; i++) { if (sums[i] < min_val) { min_val = sums[i]; min_ind = i; } } return min_ind; } int_t static_partition (struct superlu_pair *work_load, int_t nwl, int_t *partition, int_t ldp, int_t * sums, int_t * counts, int nprocs) { //initialization loop for (int i = 0; i < nprocs; ++i) { counts[i] = 0; sums[i] = 0; } qsort (work_load, nwl, sizeof (struct superlu_pair), compare_pair); // for(int i=0;i= 0; i--) { int_t ind = get_min (sums, nprocs); // printf("ind %d\n",ind ); partition[ldp * ind + counts[ind]] = work_load[i].ind; counts[ind]++; sums[ind] += work_load[i].val; } return 0; } /* * Search for the metadata of the j-th block in a U panel. */ void arrive_at_ublock (int_t j, /* j-th block in a U panel */ int_t * iukp, /* output : point to index[] of j-th block */ int_t * rukp, /* output : point to nzval[] of j-th block */ int_t * jb, /* Global block number of block U(k,j). */ int_t * ljb, /* Local block number of U(k,j). */ int_t * nsupc,/* supernode size of destination block */ int_t iukp0, /* input : search starting point */ int_t rukp0, int_t * usub, /* U subscripts */ int_t * perm_u, /* permutation vector from static schedule */ int_t * xsup, /* for SuperSize and LBj */ gridinfo_t * grid) { int_t jj; *iukp = iukp0; /* point to the first block in index[] */ *rukp = rukp0; /* point to the start of nzval[] */ /* Sherry -- why always starts from 0 ?? Can continue at the column left from last search. */ /* Caveat: There is a permutation perm_u involved for j. That's why the search need to restart from 0. */ #ifdef ISORT for (jj = 0; jj < perm_u[j]; jj++) /* perm_u[j] == j */ #else for (jj = 0; jj < perm_u[2 * j + 1]; jj++) /* == j */ #endif { /* Reinitilize the pointers to the beginning of the * k-th column/row of L/U factors. * usub[] - index array for panel U(k,:) */ // printf("iukp %d \n",*iukp ); *jb = usub[*iukp]; /* Global block number of block U(k,j). */ // printf("jb %d \n",*jb ); *nsupc = SuperSize (*jb); // printf("nsupc %d \n",*nsupc ); *iukp += UB_DESCRIPTOR; /* Start fstnz of block U(k,j). */ *rukp += usub[*iukp - 1]; /* Jump # of nonzeros in block U(k,jj); Move to block U(k,jj+1) in nzval[] */ *iukp += *nsupc; } /* Set the pointers to the beginning of U block U(k,j) */ *jb = usub[*iukp]; /* Global block number of block U(k,j). */ *ljb = LBj (*jb, grid); /* Local block number of U(k,j). */ *nsupc = SuperSize (*jb); *iukp += UB_DESCRIPTOR; /* Start fstnz of block U(k,j). */ } /* * Count the maximum size of U(k,:) across all the MPI processes. * September 28, 2016 */ static int_t num_full_cols_U ( int_t kk, int_t **Ufstnz_br_ptr, int_t *xsup, gridinfo_t *grid, int_t *perm_u, int_t *ldu /* max. size of nonzero columns in U(kk,:) */ ) { int_t lk = LBi (kk, grid); int_t *usub = Ufstnz_br_ptr[lk]; if (usub == NULL) return 0; /* code */ int_t iukp = BR_HEADER; /* Skip header; Pointer to index[] of U(k,:) */ int_t rukp = 0; /* Pointer to nzval[] of U(k,:) */ int_t nub = usub[0]; /* Number of blocks in the block row U(k,:) */ int_t klst = FstBlockC (kk + 1); int_t iukp0 = iukp; int_t rukp0 = rukp; int_t jb,ljb; int_t nsupc; int_t full = 1; int_t full_Phi = 1; int_t temp_ncols = 0; int_t segsize; for (int_t j = 0; j < nub; ++j) { /* Sherry -- no need to search from beginning ?? */ arrive_at_ublock( j, &iukp, &rukp, &jb, &ljb, &nsupc, iukp0, rukp0, usub, perm_u, xsup, grid ); for (int_t jj = iukp; jj < iukp + nsupc; ++jj) { segsize = klst - usub[jj]; if ( segsize ) ++temp_ncols; if ( segsize > *ldu ) *ldu = segsize; } } return temp_ncols; } int_t estimate_bigu_size(int_t nsupers, int_t ldt, /* Largest segment of all U(k,:) columns */ int_t**Ufstnz_br_ptr, /* point to U index[] array */ Glu_persist_t *Glu_persist, gridinfo_t* grid, int_t* perm_u) { int_t iam = grid->iam; int_t Pc = grid->npcol; int_t Pr = grid->nprow; int_t myrow = MYROW (iam, grid); int_t mycol = MYCOL (iam, grid); int_t* xsup = Glu_persist->xsup; int_t ncols = 0; /* Count local number of nonzero columns */ int_t ldu = 0; /* Count local max. size of nonzero columns */ /*initialize perm_u*/ for (int i = 0; i < nsupers; ++i) perm_u[i] = i; for (int lk = myrow; lk < nsupers; lk += Pr ) { ncols = SUPERLU_MAX(ncols, num_full_cols_U(lk, Ufstnz_br_ptr, xsup, grid, perm_u, &ldu) ); } int_t max_ncols = 0; int_t max_ldu = 0; MPI_Allreduce(&ncols, &max_ncols, 1, mpi_int_t, MPI_MAX, grid->cscp.comm); MPI_Allreduce(&ldu, &max_ldu, 1, mpi_int_t, MPI_MAX, grid->cscp.comm); #if ( PRNTlevel>=1 ) printf("max_ncols %d, max_ldu %d, ldt %d, bigu_size=%d\n", max_ncols, max_ldu, ldt, max_ldu*max_ncols); #endif return(max_ldu * max_ncols); } SuperLU_DIST_5.3.0/SRC/pzgssvx_ABglobal.c0000644013363400111340000012217013233431301016701 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Solves a system of linear equations A*X=B, * *
 * -- Distributed SuperLU routine (version 4.3) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 *
 * Last modified:
 * December 31, 2015   version 4.3
 * 
*/ #include #include "superlu_zdefs.h" /*! \brief * *
 * Purpose
 * =======
 *
 * pzgssvx_ABglobal solves a system of linear equations A*X=B,
 * by using Gaussian elimination with "static pivoting" to
 * compute the LU factorization of A.
 *
 * Static pivoting is a technique that combines the numerical stability
 * of partial pivoting with the scalability of Cholesky (no pivoting),
 * to run accurately and efficiently on large numbers of processors.
 *
 * See our paper at http://www.nersc.gov/~xiaoye/SuperLU/ for a detailed
 * description of the parallel algorithms.
 *
 * Here are the options for using this code:
 *
 *   1. Independent of all the other options specified below, the
 *      user must supply
 *
 *      -  B, the matrix of right hand sides, and its dimensions ldb and nrhs
 *      -  grid, a structure describing the 2D processor mesh
 *      -  options->IterRefine, which determines whether or not to
 *            improve the accuracy of the computed solution using 
 *            iterative refinement
 *
 *      On output, B is overwritten with the solution X.
 *
 *   2. Depending on options->Fact, the user has several options
 *      for solving A*X=B. The standard option is for factoring
 *      A "from scratch". (The other options, described below,
 *      are used when A is sufficiently similar to a previously 
 *      solved problem to save time by reusing part or all of 
 *      the previous factorization.)
 *
 *      -  options->Fact = DOFACT: A is factored "from scratch"
 *
 *      In this case the user must also supply
 *
 *      -  A, the input matrix
 *
 *      as well as the following options, which are described in more 
 *      detail below:
 *
 *      -  options->Equil,   to specify how to scale the rows and columns
 *                           of A to "equilibrate" it (to try to reduce its
 *                           condition number and so improve the
 *                           accuracy of the computed solution)
 *
 *      -  options->RowPerm, to specify how to permute the rows of A
 *                           (typically to control numerical stability)
 *
 *      -  options->ColPerm, to specify how to permute the columns of A
 *                           (typically to control fill-in and enhance
 *                           parallelism during factorization)
 *
 *      -  options->ReplaceTinyPivot, to specify how to deal with tiny
 *                           pivots encountered during factorization
 *                           (to control numerical stability)
 *
 *      The outputs returned include
 *         
 *      -  ScalePermstruct,  modified to describe how the input matrix A
 *                           was equilibrated and permuted:
 *         -  ScalePermstruct->DiagScale, indicates whether the rows and/or
 *                                        columns of A were scaled
 *         -  ScalePermstruct->R, array of row scale factors
 *         -  ScalePermstruct->C, array of column scale factors
 *         -  ScalePermstruct->perm_r, row permutation vector
 *         -  ScalePermstruct->perm_c, column permutation vector
 *
 *            (part of ScalePermstruct may also need to be supplied on input,
 *             depending on options->RowPerm and options->ColPerm as described 
 *             later).
 *
 *      -  A, the input matrix A overwritten by the scaled and permuted matrix
 *                Pc*Pr*diag(R)*A*diag(C)
 *             where 
 *                Pr and Pc are row and columns permutation matrices determined
 *                  by ScalePermstruct->perm_r and ScalePermstruct->perm_c, 
 *                  respectively, and 
 *                diag(R) and diag(C) are diagonal scaling matrices determined
 *                  by ScalePermstruct->DiagScale, ScalePermstruct->R and 
 *                  ScalePermstruct->C
 *
 *      -  LUstruct, which contains the L and U factorization of A1 where
 *
 *                A1 = Pc*Pr*diag(R)*A*diag(C)*Pc^T = L*U
 *
 *              (Note that A1 = Aout * Pc^T, where Aout is the matrix stored
 *               in A on output.)
 *
 *   3. The second value of options->Fact assumes that a matrix with the same
 *      sparsity pattern as A has already been factored:
 *     
 *      -  options->Fact = SamePattern: A is factored, assuming that it has
 *            the same nonzero pattern as a previously factored matrix. In this
 *            case the algorithm saves time by reusing the previously computed
 *            column permutation vector stored in ScalePermstruct->perm_c
 *            and the "elimination tree" of A stored in LUstruct->etree.
 *
 *      In this case the user must still specify the following options
 *      as before:
 *
 *      -  options->Equil
 *      -  options->RowPerm
 *      -  options->ReplaceTinyPivot
 *
 *      but not options->ColPerm, whose value is ignored. This is because the
 *      previous column permutation from ScalePermstruct->perm_c is used as
 *      input. The user must also supply 
 *
 *      -  A, the input matrix
 *      -  ScalePermstruct->perm_c, the column permutation
 *      -  LUstruct->etree, the elimination tree
 *
 *      The outputs returned include
 *         
 *      -  A, the input matrix A overwritten by the scaled and permuted matrix
 *            as described above
 *      -  ScalePermstruct,  modified to describe how the input matrix A was
 *                           equilibrated and row permuted
 *      -  LUstruct, modified to contain the new L and U factors
 *
 *   4. The third value of options->Fact assumes that a matrix B with the same
 *      sparsity pattern as A has already been factored, and where the
 *      row permutation of B can be reused for A. This is useful when A and B
 *      have similar numerical values, so that the same row permutation
 *      will make both factorizations numerically stable. This lets us reuse
 *      all of the previously computed structure of L and U.
 *
 *      -  options->Fact = SamePattern_SameRowPerm: A is factored,
 *            assuming not only the same nonzero pattern as the previously
 *            factored matrix B, but reusing B's row permutation.
 *
 *      In this case the user must still specify the following options
 *      as before:
 *
 *      -  options->Equil
 *      -  options->ReplaceTinyPivot
 *
 *      but not options->RowPerm or options->ColPerm, whose values are ignored.
 *      This is because the permutations from ScalePermstruct->perm_r and
 *      ScalePermstruct->perm_c are used as input.
 *
 *      The user must also supply 
 *
 *      -  A, the input matrix
 *      -  ScalePermstruct->DiagScale, how the previous matrix was row and/or
 *                                     column scaled
 *      -  ScalePermstruct->R, the row scalings of the previous matrix, if any
 *      -  ScalePermstruct->C, the columns scalings of the previous matrix, 
 *                             if any
 *      -  ScalePermstruct->perm_r, the row permutation of the previous matrix
 *      -  ScalePermstruct->perm_c, the column permutation of the previous 
 *                                  matrix
 *      -  all of LUstruct, the previously computed information about L and U
 *                (the actual numerical values of L and U stored in
 *                 LUstruct->Llu are ignored)
 *
 *      The outputs returned include
 *         
 *      -  A, the input matrix A overwritten by the scaled and permuted matrix
 *            as described above
 *      -  ScalePermstruct,  modified to describe how the input matrix A was
 *                           equilibrated 
 *                  (thus ScalePermstruct->DiagScale, R and C may be modified)
 *      -  LUstruct, modified to contain the new L and U factors
 *
 *   5. The fourth and last value of options->Fact assumes that A is
 *      identical to a matrix that has already been factored on a previous 
 *      call, and reuses its entire LU factorization
 *
 *      -  options->Fact = Factored: A is identical to a previously
 *            factorized matrix, so the entire previous factorization
 *            can be reused.
 *
 *      In this case all the other options mentioned above are ignored
 *      (options->Equil, options->RowPerm, options->ColPerm, 
 *       options->ReplaceTinyPivot)
 *
 *      The user must also supply 
 *
 *      -  A, the unfactored matrix, only in the case that iterative refinement
 *            is to be done (specifically A must be the output A from 
 *            the previous call, so that it has been scaled and permuted)
 *      -  all of ScalePermstruct
 *      -  all of LUstruct, including the actual numerical values of L and U
 *
 *      all of which are unmodified on output.
 *         
 * Arguments
 * =========
 *
 * options (input) superlu_dist_options_t*
 *         The structure defines the input parameters to control
 *         how the LU decomposition will be performed.
 *         The following fields should be defined for this structure:
 *         
 *         o Fact (fact_t)
 *           Specifies whether or not the factored form of the matrix
 *           A is supplied on entry, and if not, how the matrix A should
 *           be factorized based on the previous history.
 *
 *           = DOFACT: The matrix A will be factorized from scratch.
 *                 Inputs:  A
 *                          options->Equil, RowPerm, ColPerm, ReplaceTinyPivot
 *                 Outputs: modified A
 *                             (possibly row and/or column scaled and/or 
 *                              permuted)
 *                          all of ScalePermstruct
 *                          all of LUstruct
 *
 *           = SamePattern: the matrix A will be factorized assuming
 *             that a factorization of a matrix with the same sparsity
 *             pattern was performed prior to this one. Therefore, this
 *             factorization will reuse column permutation vector 
 *             ScalePermstruct->perm_c and the elimination tree
 *             LUstruct->etree
 *                 Inputs:  A
 *                          options->Equil, RowPerm, ReplaceTinyPivot
 *                          ScalePermstruct->perm_c
 *                          LUstruct->etree
 *                 Outputs: modified A
 *                             (possibly row and/or column scaled and/or 
 *                              permuted)
 *                          rest of ScalePermstruct (DiagScale, R, C, perm_r)
 *                          rest of LUstruct (GLU_persist, Llu)
 *
 *           = SamePattern_SameRowPerm: the matrix A will be factorized
 *             assuming that a factorization of a matrix with the same
 *             sparsity	pattern and similar numerical values was performed
 *             prior to this one. Therefore, this factorization will reuse
 *             both row and column scaling factors R and C, and the
 *             both row and column permutation vectors perm_r and perm_c,
 *             distributed data structure set up from the previous symbolic
 *             factorization.
 *                 Inputs:  A
 *                          options->Equil, ReplaceTinyPivot
 *                          all of ScalePermstruct
 *                          all of LUstruct
 *                 Outputs: modified A
 *                             (possibly row and/or column scaled and/or 
 *                              permuted)
 *                          modified LUstruct->Llu
 *           = FACTORED: the matrix A is already factored.
 *                 Inputs:  all of ScalePermstruct
 *                          all of LUstruct
 *
 *         o Equil (yes_no_t)
 *           Specifies whether to equilibrate the system.
 *           = NO:  no equilibration.
 *           = YES: scaling factors are computed to equilibrate the system:
 *                      diag(R)*A*diag(C)*inv(diag(C))*X = diag(R)*B.
 *                  Whether or not the system will be equilibrated depends
 *                  on the scaling of the matrix A, but if equilibration is
 *                  used, A is overwritten by diag(R)*A*diag(C) and B by
 *                  diag(R)*B.
 *
 *         o RowPerm (rowperm_t)
 *           Specifies how to permute rows of the matrix A.
 *           = NATURAL:   use the natural ordering.
 *           = LargeDiag: use the Duff/Koster algorithm to permute rows of
 *                        the original matrix to make the diagonal large
 *                        relative to the off-diagonal.
 *           = MY_PERMR:  use the ordering given in ScalePermstruct->perm_r
 *                        input by the user.
 *           
 *         o ColPerm (colperm_t)
 *           Specifies what type of column permutation to use to reduce fill.
 *           = NATURAL:       natural ordering.
 *           = MMD_AT_PLUS_A: minimum degree ordering on structure of A'+A.
 *           = MMD_ATA:       minimum degree ordering on structure of A'*A.
 *           = MY_PERMC:      the ordering given in ScalePermstruct->perm_c.
 *         
 *         o ReplaceTinyPivot (yes_no_t)
 *           = NO:  do not modify pivots
 *           = YES: replace tiny pivots by sqrt(epsilon)*norm(A) during 
 *                  LU factorization.
 *
 *         o IterRefine (IterRefine_t)
 *           Specifies how to perform iterative refinement.
 *           = NO:     no iterative refinement.
 *           = SLU_DOUBLE: accumulate residual in double precision.
 *           = SLU_EXTRA:  accumulate residual in extra precision.
 *
 *         NOTE: all options must be identical on all processes when
 *               calling this routine.
 *
 * A (input/output) SuperMatrix*
 *         On entry, matrix A in A*X=B, of dimension (A->nrow, A->ncol).
 *         The number of linear equations is A->nrow. The type of A must be:
 *         Stype = SLU_NC; Dtype = SLU_Z; Mtype = SLU_GE. That is, A is stored in
 *         compressed column format (also known as Harwell-Boeing format).
 *         See supermatrix.h for the definition of 'SuperMatrix'.
 *         This routine only handles square A, however, the LU factorization
 *         routine pzgstrf can factorize rectangular matrices.
 *         On exit, A may be overwritten by Pc*Pr*diag(R)*A*diag(C),
 *         depending on ScalePermstruct->DiagScale, options->RowPerm and
 *         options->colpem:
 *             if ScalePermstruct->DiagScale != NOEQUIL, A is overwritten by
 *                diag(R)*A*diag(C).
 *             if options->RowPerm != NATURAL, A is further overwritten by
 *                Pr*diag(R)*A*diag(C).
 *             if options->ColPerm != NATURAL, A is further overwritten by
 *                Pc*Pr*diag(R)*A*diag(C).
 *         If all the above condition are true, the LU decomposition is
 *         performed on the matrix Pc*Pr*diag(R)*A*diag(C)*Pc^T.
 *
 *         NOTE: Currently, A must reside in all processes when calling
 *               this routine.
 *
 * ScalePermstruct (input/output) ScalePermstruct_t*
 *         The data structure to store the scaling and permutation vectors
 *         describing the transformations performed to the matrix A.
 *         It contains the following fields:
 *
 *         o DiagScale (DiagScale_t)
 *           Specifies the form of equilibration that was done.
 *           = NOEQUIL: no equilibration.
 *           = ROW:     row equilibration, i.e., A was premultiplied by
 *                      diag(R).
 *           = COL:     Column equilibration, i.e., A was postmultiplied
 *                      by diag(C).
 *           = BOTH:    both row and column equilibration, i.e., A was 
 *                      replaced by diag(R)*A*diag(C).
 *           If options->Fact = FACTORED or SamePattern_SameRowPerm,
 *           DiagScale is an input argument; otherwise it is an output
 *           argument.
 *
 *         o perm_r (int*)
 *           Row permutation vector, which defines the permutation matrix Pr;
 *           perm_r[i] = j means row i of A is in position j in Pr*A.
 *           If options->RowPerm = MY_PERMR, or
 *           options->Fact = SamePattern_SameRowPerm, perm_r is an
 *           input argument; otherwise it is an output argument.
 *
 *         o perm_c (int*)
 *           Column permutation vector, which defines the 
 *           permutation matrix Pc; perm_c[i] = j means column i of A is 
 *           in position j in A*Pc.
 *           If options->ColPerm = MY_PERMC or options->Fact = SamePattern
 *           or options->Fact = SamePattern_SameRowPerm, perm_c is an
 *           input argument; otherwise, it is an output argument.
 *           On exit, perm_c may be overwritten by the product of the input
 *           perm_c and a permutation that postorders the elimination tree
 *           of Pc*A'*A*Pc'; perm_c is not changed if the elimination tree
 *           is already in postorder.
 *
 *         o R (double*) dimension (A->nrow)
 *           The row scale factors for A.
 *           If DiagScale = ROW or BOTH, A is multiplied on the left by 
 *                          diag(R).
 *           If DiagScale = NOEQUIL or COL, R is not defined.
 *           If options->Fact = FACTORED or SamePattern_SameRowPerm, R is
 *           an input argument; otherwise, R is an output argument.
 *
 *         o C (double*) dimension (A->ncol)
 *           The column scale factors for A.
 *           If DiagScale = COL or BOTH, A is multiplied on the right by 
 *                          diag(C).
 *           If DiagScale = NOEQUIL or ROW, C is not defined.
 *           If options->Fact = FACTORED or SamePattern_SameRowPerm, C is
 *           an input argument; otherwise, C is an output argument.
 *         
 * B       (input/output) doublecomplex*
 *         On entry, the right-hand side matrix of dimension (A->nrow, nrhs).
 *         On exit, the solution matrix if info = 0;
 *
 *         NOTE: Currently, B must reside in all processes when calling
 *               this routine.
 *
 * ldb     (input) int (global)
 *         The leading dimension of matrix B.
 *
 * nrhs    (input) int (global)
 *         The number of right-hand sides.
 *         If nrhs = 0, only LU decomposition is performed, the forward
 *         and back substitutions are skipped.
 *
 * grid    (input) gridinfo_t*
 *         The 2D process mesh. It contains the MPI communicator, the number
 *         of process rows (NPROW), the number of process columns (NPCOL),
 *         and my process rank. It is an input argument to all the
 *         parallel routines.
 *         Grid can be initialized by subroutine SUPERLU_GRIDINIT.
 *         See superlu_zdefs.h for the definition of 'gridinfo_t'.
 *
 * LUstruct (input/output) LUstruct_t*
 *         The data structures to store the distributed L and U factors.
 *         It contains the following fields:
 *
 *         o etree (int*) dimension (A->ncol)
 *           Elimination tree of Pc*(A'+A)*Pc' or Pc*A'*A*Pc', dimension A->ncol.
 *           It is computed in sp_colorder() during the first factorization,
 *           and is reused in the subsequent factorizations of the matrices
 *           with the same nonzero pattern.
 *           On exit of sp_colorder(), the columns of A are permuted so that
 *           the etree is in a certain postorder. This postorder is reflected
 *           in ScalePermstruct->perm_c.
 *           NOTE:
 *           Etree is a vector of parent pointers for a forest whose vertices
 *           are the integers 0 to A->ncol-1; etree[root]==A->ncol.
 *
 *         o Glu_persist (Glu_persist_t*)
 *           Global data structure (xsup, supno) replicated on all processes,
 *           describing the supernode partition in the factored matrices
 *           L and U:
 *	       xsup[s] is the leading column of the s-th supernode,
 *             supno[i] is the supernode number to which column i belongs.
 *
 *         o Llu (LocalLU_t*)
 *           The distributed data structures to store L and U factors.
 *           See superlu_ddefs.h for the definition of 'LocalLU_t'.
 *
 * berr    (output) double*, dimension (nrhs)
 *         The componentwise relative backward error of each solution   
 *         vector X(j) (i.e., the smallest relative change in   
 *         any element of A or B that makes X(j) an exact solution).
 *
 * stat   (output) SuperLUStat_t*
 *        Record the statistics on runtime and floating-point operation count.
 *        See util.h for the definition of 'SuperLUStat_t'.
 *
 * info    (output) int*
 *         = 0: successful exit
 *         > 0: if info = i, and i is
 *             <= A->ncol: U(i,i) is exactly zero. The factorization has
 *                been completed, but the factor U is exactly singular,
 *                so the solution could not be computed.
 *             > A->ncol: number of bytes allocated when memory allocation
 *                failure occurred, plus A->ncol.
 *
 *
 * See superlu_zdefs.h for the definitions of various data types.
 * 
*/ void pzgssvx_ABglobal(superlu_dist_options_t *options, SuperMatrix *A, ScalePermstruct_t *ScalePermstruct, doublecomplex B[], int ldb, int nrhs, gridinfo_t *grid, LUstruct_t *LUstruct, double *berr, SuperLUStat_t *stat, int *info) { SuperMatrix AC; NCformat *Astore; NCPformat *ACstore; Glu_persist_t *Glu_persist = LUstruct->Glu_persist; Glu_freeable_t *Glu_freeable; /* The nonzero structures of L and U factors, which are replicated on all processrs. (lsub, xlsub) contains the compressed subscript of supernodes in L. (usub, xusub) contains the compressed subscript of nonzero segments in U. If options->Fact != SamePattern_SameRowPerm, they are computed by SYMBFACT routine, and then used by DDISTRIBUTE routine. They will be freed after DDISTRIBUTE routine. If options->Fact == SamePattern_SameRowPerm, these structures are not used. */ fact_t Fact; doublecomplex *a; int_t *perm_r; /* row permutations from partial pivoting */ int_t *perm_c; /* column permutation vector */ int_t *etree; /* elimination tree */ int_t *colptr, *rowind; int_t Equil, factored, job, notran, colequ, rowequ; int_t i, iinfo, j, irow, m, n, nnz, permc_spec, dist_mem_use; int iam; int ldx; /* LDA for matrix X (global). */ char equed[1], norm[1]; double *C, *R, *C1, *R1, amax, anorm, colcnd, rowcnd; doublecomplex *X, *b_col, *b_work, *x_col; double t; static superlu_dist_mem_usage_t num_mem_usage, symb_mem_usage; #if ( PRNTlevel>= 2 ) double dmin, dsum, dprod; #endif /* Test input parameters. */ *info = 0; Fact = options->Fact; if ( Fact < 0 || Fact > FACTORED ) *info = -1; else if ( options->RowPerm < 0 || options->RowPerm > MY_PERMR ) *info = -1; else if ( options->ColPerm < 0 || options->ColPerm > MY_PERMC ) *info = -1; else if ( options->IterRefine < 0 || options->IterRefine > SLU_EXTRA ) *info = -1; else if ( options->IterRefine == SLU_EXTRA ) { *info = -1; fprintf(stderr, "Extra precise iterative refinement yet to support."); } else if ( A->nrow != A->ncol || A->nrow < 0 || A->Stype != SLU_NC || A->Dtype != SLU_Z || A->Mtype != SLU_GE ) *info = -2; else if ( ldb < A->nrow ) *info = -5; else if ( nrhs < 0 ) *info = -6; if ( *info ) { i = -(*info); pxerr_dist("pzgssvx_ABglobal", grid, -*info); return; } /* Initialization */ factored = (Fact == FACTORED); Equil = (!factored && options->Equil == YES); notran = (options->Trans == NOTRANS); iam = grid->iam; job = 5; m = A->nrow; n = A->ncol; Astore = A->Store; nnz = Astore->nnz; a = Astore->nzval; colptr = Astore->colptr; rowind = Astore->rowind; if ( factored || (Fact == SamePattern_SameRowPerm && Equil) ) { rowequ = (ScalePermstruct->DiagScale == ROW) || (ScalePermstruct->DiagScale == BOTH); colequ = (ScalePermstruct->DiagScale == COL) || (ScalePermstruct->DiagScale == BOTH); } else rowequ = colequ = FALSE; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter pzgssvx_ABglobal()"); #endif perm_r = ScalePermstruct->perm_r; perm_c = ScalePermstruct->perm_c; etree = LUstruct->etree; R = ScalePermstruct->R; C = ScalePermstruct->C; if ( Equil && Fact != SamePattern_SameRowPerm ) { /* Allocate storage if not done so before. */ switch ( ScalePermstruct->DiagScale ) { case NOEQUIL: if ( !(R = (double *) doubleMalloc_dist(m)) ) ABORT("Malloc fails for R[]."); if ( !(C = (double *) doubleMalloc_dist(n)) ) ABORT("Malloc fails for C[]."); ScalePermstruct->R = R; ScalePermstruct->C = C; break; case ROW: if ( !(C = (double *) doubleMalloc_dist(n)) ) ABORT("Malloc fails for C[]."); ScalePermstruct->C = C; break; case COL: if ( !(R = (double *) doubleMalloc_dist(m)) ) ABORT("Malloc fails for R[]."); ScalePermstruct->R = R; break; } } /* ------------------------------------------------------------ Diagonal scaling to equilibrate the matrix. ------------------------------------------------------------*/ if ( Equil ) { #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter equil"); #endif t = SuperLU_timer_(); if ( Fact == SamePattern_SameRowPerm ) { /* Reuse R and C. */ switch ( ScalePermstruct->DiagScale ) { case NOEQUIL: break; case ROW: for (j = 0; j < n; ++j) { for (i = colptr[j]; i < colptr[j+1]; ++i) { irow = rowind[i]; zd_mult(&a[i], &a[i], R[i]); /* Scale rows. */ } } break; case COL: for (j = 0; j < n; ++j) for (i = colptr[j]; i < colptr[j+1]; ++i) zd_mult(&a[i], &a[i], C[j]); /* Scale columns. */ break; case BOTH: for (j = 0; j < n; ++j) { for (i = colptr[j]; i < colptr[j+1]; ++i) { irow = rowind[i]; zd_mult(&a[i], &a[i], R[irow]); /* Scale rows. */ zd_mult(&a[i], &a[i], C[j]); /* Scale columns. */ } } break; } } else { if ( !iam ) { /* Compute row and column scalings to equilibrate matrix A. */ zgsequ_dist(A, R, C, &rowcnd, &colcnd, &amax, &iinfo); MPI_Bcast( &iinfo, 1, mpi_int_t, 0, grid->comm ); if ( iinfo == 0 ) { MPI_Bcast( R, m, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( C, n, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( &rowcnd, 1, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( &colcnd, 1, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( &amax, 1, MPI_DOUBLE, 0, grid->comm ); } else { if ( iinfo > 0 ) { if ( iinfo <= m ) { #if ( PRNTlevel>=1 ) fprintf(stderr, "The " IFMT "-th row of A is exactly zero\n", iinfo); #endif } else { #if ( PRNTlevel>=1 ) fprintf(stderr, "The " IFMT "-th column of A is exactly zero\n", iinfo-n); #endif } } } } else { MPI_Bcast( &iinfo, 1, mpi_int_t, 0, grid->comm ); if ( iinfo == 0 ) { MPI_Bcast( R, m, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( C, n, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( &rowcnd, 1, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( &colcnd, 1, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( &amax, 1, MPI_DOUBLE, 0, grid->comm ); } } if ( iinfo == 0 ) { /* Equilibrate matrix A. */ zlaqgs_dist(A, R, C, rowcnd, colcnd, amax, equed); if ( strncmp(equed, "R", 1)==0 ) { ScalePermstruct->DiagScale = ROW; rowequ = ROW; } else if ( strncmp(equed, "C", 1)==0 ) { ScalePermstruct->DiagScale = COL; colequ = COL; } else if ( strncmp(equed, "B", 1)==0 ) { ScalePermstruct->DiagScale = BOTH; rowequ = ROW; colequ = COL; } else ScalePermstruct->DiagScale = NOEQUIL; } #if ( PRNTlevel>=1 ) if ( !iam ) { printf(".. equilibrated? *equed = %c\n", *equed); /*fflush(stdout);*/ } #endif } /* if Fact ... */ stat->utime[EQUIL] = SuperLU_timer_() - t; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit equil"); #endif } /* end if Equil ... */ /* ------------------------------------------------------------ Permute rows of A. ------------------------------------------------------------*/ if ( options->RowPerm != NO ) { t = SuperLU_timer_(); if ( Fact == SamePattern_SameRowPerm /* Reuse perm_r. */ || options->RowPerm == MY_PERMR ) { /* Use my perm_r. */ for (j = 0; j < n; ++j) { for (i = colptr[j]; i < colptr[j+1]; ++i) { irow = rowind[i]; rowind[i] = perm_r[irow]; } } } else if ( !factored ) { if ( job == 5 ) { /* Allocate storage for scaling factors. */ if ( !(R1 = (double *) SUPERLU_MALLOC(m * sizeof(double))) ) ABORT("SUPERLU_MALLOC fails for R1[]"); if ( !(C1 = (double *) SUPERLU_MALLOC(n * sizeof(double))) ) ABORT("SUPERLU_MALLOC fails for C1[]"); } if ( !iam ) { /* Process 0 finds a row permutation for large diagonal. */ iinfo = zldperm_dist(job, m, nnz, colptr, rowind, a, perm_r, R1, C1); MPI_Bcast( &iinfo, 1, mpi_int_t, 0, grid->comm ); if ( iinfo == 0 ) { MPI_Bcast( perm_r, m, mpi_int_t, 0, grid->comm ); if ( job == 5 && Equil ) { MPI_Bcast( R1, m, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( C1, n, MPI_DOUBLE, 0, grid->comm ); } } } else { MPI_Bcast( &iinfo, 1, mpi_int_t, 0, grid->comm ); if ( iinfo == 0 ) { MPI_Bcast( perm_r, m, mpi_int_t, 0, grid->comm ); if ( job == 5 && Equil ) { MPI_Bcast( R1, m, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( C1, n, MPI_DOUBLE, 0, grid->comm ); } } } if ( iinfo && job == 5) { SUPERLU_FREE(R1); SUPERLU_FREE(C1); } #if ( PRNTlevel>=2 ) dmin = dmach_dist("Overflow"); dsum = 0.0; dprod = 1.0; #endif if ( iinfo == 0 ) { if ( job == 5 ) { if ( Equil ) { for (i = 0; i < n; ++i) { R1[i] = exp(R1[i]); C1[i] = exp(C1[i]); } for (j = 0; j < n; ++j) { for (i = colptr[j]; i < colptr[j+1]; ++i) { irow = rowind[i]; zd_mult(&a[i], &a[i], R1[irow]); /* Scale rows. */ zd_mult(&a[i], &a[i], C1[j]); /* Scale columns. */ rowind[i] = perm_r[irow]; #if ( PRNTlevel>=2 ) if ( rowind[i] == j ) /* New diagonal */ dprod *= slud_z_abs1(&a[i]); #endif } } /* Multiply together the scaling factors. */ if ( rowequ ) for (i = 0; i < m; ++i) R[i] *= R1[i]; else for (i = 0; i < m; ++i) R[i] = R1[i]; if ( colequ ) for (i = 0; i < n; ++i) C[i] *= C1[i]; else for (i = 0; i < n; ++i) C[i] = C1[i]; ScalePermstruct->DiagScale = BOTH; rowequ = colequ = 1; } else { /* No equilibration. */ for (j = 0; j < n; ++j) { for (i = colptr[j]; i < colptr[j+1]; ++i) { irow = rowind[i]; rowind[i] = perm_r[irow]; } } } SUPERLU_FREE (R1); SUPERLU_FREE (C1); } else { /* job = 2,3,4 */ for (j = 0; j < n; ++j) { for (i = colptr[j]; i < colptr[j+1]; ++i) { irow = rowind[i]; rowind[i] = perm_r[irow]; #if ( PRNTlevel>=2 ) if ( rowind[i] == j ) { /* New diagonal */ if ( job == 2 || job == 3 ) dmin = SUPERLU_MIN(dmin, slud_z_abs1(&a[i])); else if ( job == 4 ) dsum += slud_z_abs1(&a[i]); else if ( job == 5 ) dprod *= slud_z_abs1(&a[i]); } #endif } /* end for i ... */ } /* end for j ... */ } /* end else */ } else { /* if iinfo != 0 */ for (i = 0; i < m; ++i) perm_r[i] = i; } #if ( PRNTlevel>=2 ) if ( job == 2 || job == 3 ) { if ( !iam ) printf("\tsmallest diagonal %e\n", dmin); } else if ( job == 4 ) { if ( !iam ) printf("\tsum of diagonal %e\n", dsum); } else if ( job == 5 ) { if ( !iam ) printf("\t product of diagonal %e\n", dprod); } #endif } /* else !factored */ t = SuperLU_timer_() - t; stat->utime[ROWPERM] = t; } else { /* options->RowPerm == NOROWPERM */ for (i = 0; i < m; ++i) perm_r[i] = i; } if ( !factored || options->IterRefine ) { /* Compute norm(A), which will be used to adjust small diagonal. */ if ( notran ) *(unsigned char *)norm = '1'; else *(unsigned char *)norm = 'I'; anorm = zlangs_dist(norm, A); } /* ------------------------------------------------------------ Perform the LU factorization. ------------------------------------------------------------*/ if ( !factored ) { t = SuperLU_timer_(); /* * Get column permutation vector perm_c[], according to permc_spec: * permc_spec = NATURAL: natural ordering * permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A * permc_spec = MMD_ATA: minimum degree on structure of A'*A * permc_spec = MY_PERMC: the ordering already supplied in perm_c[] */ permc_spec = options->ColPerm; if ( permc_spec != MY_PERMC && Fact == DOFACT ) /* Use an ordering provided by SuperLU */ get_perm_c_dist(iam, permc_spec, A, perm_c); /* Compute the elimination tree of Pc*(A'+A)*Pc' or Pc*A'*A*Pc' (a.k.a. column etree), depending on the choice of ColPerm. Adjust perm_c[] to be consistent with a postorder of etree. Permute columns of A to form A*Pc'. */ sp_colorder(options, A, perm_c, etree, &AC); /* Form Pc*A*Pc' to preserve the diagonal of the matrix Pr*A. */ ACstore = AC.Store; for (j = 0; j < n; ++j) for (i = ACstore->colbeg[j]; i < ACstore->colend[j]; ++i) { irow = ACstore->rowind[i]; ACstore->rowind[i] = perm_c[irow]; } stat->utime[COLPERM] = SuperLU_timer_() - t; /* Perform a symbolic factorization on matrix A and set up the nonzero data structures which are suitable for supernodal GENP. */ if ( Fact != SamePattern_SameRowPerm ) { #if ( PRNTlevel>=1 ) if ( !iam ) printf(".. symbfact(): relax " IFMT ", maxsuper " IFMT ", fill " IFMT "\n", sp_ienv_dist(2), sp_ienv_dist(3), sp_ienv_dist(6)); #endif t = SuperLU_timer_(); if ( !(Glu_freeable = (Glu_freeable_t *) SUPERLU_MALLOC(sizeof(Glu_freeable_t))) ) ABORT("Malloc fails for Glu_freeable."); iinfo = symbfact(options, iam, &AC, perm_c, etree, Glu_persist, Glu_freeable); stat->utime[SYMBFAC] = SuperLU_timer_() - t; if ( iinfo <= 0 ) { QuerySpace_dist(n, -iinfo, Glu_freeable, &symb_mem_usage); #if ( PRNTlevel>=1 ) if ( !iam ) { printf("\tNo of supers %ld\n", (long long)Glu_persist->supno[n-1]+1); printf("\tSize of G(L) %ld\n", (long long)Glu_freeable->xlsub[n]); printf("\tSize of G(U) %ld\n", (long long)Glu_freeable->xusub[n]); printf("\tint %d, short %d, float %d, double %d\n", (int) sizeof(int_t), (int) sizeof(short), (int) sizeof(float), (int) sizeof(double)); printf("\tSYMBfact (MB):\tL\\U %.2f\ttotal %.2f\texpansions " IFMT "\n", symb_mem_usage.for_lu*1e-6, symb_mem_usage.total*1e-6, symb_mem_usage.expansions); } #endif } else { /* symbfact out of memory */ #if ( PRNTlevel>=1 ) if ( !iam ) fprintf(stderr, "symbfact() error returns " IFMT "\n", iinfo); #endif *info = iinfo; return; } } /* Distribute the L and U factors onto the process grid. */ t = SuperLU_timer_(); dist_mem_use = zdistribute(Fact, n, &AC, Glu_freeable, LUstruct, grid); stat->utime[DIST] = SuperLU_timer_() - t; /* Deallocate storage used in symbolic factor. */ if ( Fact != SamePattern_SameRowPerm ) { iinfo = symbfact_SubFree(Glu_freeable); SUPERLU_FREE(Glu_freeable); } /* Perform numerical factorization in parallel. */ t = SuperLU_timer_(); pzgstrf(options, m, n, anorm, LUstruct, grid, stat, info); stat->utime[FACT] = SuperLU_timer_() - t; #if ( PRNTlevel>=1 ) { int_t TinyPivots; float for_lu, total, max, avg, temp; zQuerySpace_dist(n, LUstruct, grid, stat, &num_mem_usage); MPI_Reduce( &num_mem_usage.for_lu, &for_lu, 1, MPI_FLOAT, MPI_SUM, 0, grid->comm ); MPI_Reduce( &num_mem_usage.total, &total, 1, MPI_FLOAT, MPI_SUM, 0, grid->comm ); temp = SUPERLU_MAX(symb_mem_usage.total, symb_mem_usage.for_lu + (float)dist_mem_use + num_mem_usage.for_lu); temp = SUPERLU_MAX(temp, num_mem_usage.total); MPI_Reduce( &temp, &max, 1, MPI_FLOAT, MPI_MAX, 0, grid->comm ); MPI_Reduce( &temp, &avg, 1, MPI_FLOAT, MPI_SUM, 0, grid->comm ); MPI_Allreduce( &stat->TinyPivots, &TinyPivots, 1, mpi_int_t, MPI_SUM, grid->comm ); stat->TinyPivots = TinyPivots; if ( !iam ) { printf("\tNUMfact (MB) all PEs:\tL\\U\t%.2f\tall\t%.2f\n", for_lu*1e-6, total*1e-6); printf("\tAll space (MB):" "\t\ttotal\t%.2f\tAvg\t%.2f\tMax\t%.2f\n", avg*1e-6, avg/grid->nprow/grid->npcol*1e-6, max*1e-6); printf("\tNumber of tiny pivots: %10d\n", stat->TinyPivots); printf(".. pzgstrf INFO = %d\n", *info); } } #endif } else if ( options->IterRefine ) { /* options->Fact==FACTORED */ /* Permute columns of A to form A*Pc' using the existing perm_c. * NOTE: rows of A were previously permuted to Pc*A. */ sp_colorder(options, A, perm_c, NULL, &AC); } /* if !factored ... */ /* ------------------------------------------------------------ Compute the solution matrix X. ------------------------------------------------------------*/ if ( nrhs && *info == 0 ) { if ( !(b_work = doublecomplexMalloc_dist(n)) ) ABORT("Malloc fails for b_work[]"); /* ------------------------------------------------------------ Scale the right-hand side if equilibration was performed. ------------------------------------------------------------*/ if ( notran ) { if ( rowequ ) { b_col = B; for (j = 0; j < nrhs; ++j) { for (i = 0; i < m; ++i) zd_mult(&b_col[i], &b_col[i], R[i]); b_col += ldb; } } } else if ( colequ ) { b_col = B; for (j = 0; j < nrhs; ++j) { for (i = 0; i < m; ++i) zd_mult(&b_col[i], &b_col[i], C[i]); b_col += ldb; } } /* ------------------------------------------------------------ Permute the right-hand side to form Pr*B. ------------------------------------------------------------*/ if ( options->RowPerm != NO ) { if ( notran ) { b_col = B; for (j = 0; j < nrhs; ++j) { for (i = 0; i < m; ++i) b_work[perm_r[i]] = b_col[i]; for (i = 0; i < m; ++i) b_col[i] = b_work[i]; b_col += ldb; } } } /* ------------------------------------------------------------ Permute the right-hand side to form Pc*B. ------------------------------------------------------------*/ if ( notran ) { b_col = B; for (j = 0; j < nrhs; ++j) { for (i = 0; i < m; ++i) b_work[perm_c[i]] = b_col[i]; for (i = 0; i < m; ++i) b_col[i] = b_work[i]; b_col += ldb; } } /* Save a copy of the right-hand side. */ ldx = ldb; if ( !(X = doublecomplexMalloc_dist(((size_t)ldx) * nrhs)) ) ABORT("Malloc fails for X[]"); x_col = X; b_col = B; for (j = 0; j < nrhs; ++j) { for (i = 0; i < ldb; ++i) x_col[i] = b_col[i]; x_col += ldx; b_col += ldb; } /* ------------------------------------------------------------ Solve the linear system. ------------------------------------------------------------*/ pzgstrs_Bglobal(n, LUstruct, grid, X, ldb, nrhs, stat, info); /* ------------------------------------------------------------ Use iterative refinement to improve the computed solution and compute error bounds and backward error estimates for it. ------------------------------------------------------------*/ if ( options->IterRefine ) { /* Improve the solution by iterative refinement. */ t = SuperLU_timer_(); pzgsrfs_ABXglobal(n, &AC, anorm, LUstruct, grid, B, ldb, X, ldx, nrhs, berr, stat, info); stat->utime[REFINE] = SuperLU_timer_() - t; } /* Permute the solution matrix X <= Pc'*X. */ for (j = 0; j < nrhs; j++) { b_col = &B[j*ldb]; x_col = &X[j*ldx]; for (i = 0; i < n; ++i) b_col[i] = x_col[perm_c[i]]; } /* Transform the solution matrix X to a solution of the original system before the equilibration. */ if ( notran ) { if ( colequ ) { b_col = B; for (j = 0; j < nrhs; ++j) { for (i = 0; i < n; ++i) zd_mult(&b_col[i], &b_col[i], C[i]); b_col += ldb; } } } else if ( rowequ ) { b_col = B; for (j = 0; j < nrhs; ++j) { for (i = 0; i < n; ++i) zd_mult(&b_col[i], &b_col[i], R[i]); b_col += ldb; } } SUPERLU_FREE(b_work); SUPERLU_FREE(X); } /* end if nrhs != 0 */ #if ( PRNTlevel>=1 ) if ( !iam ) printf(".. DiagScale = %d\n", ScalePermstruct->DiagScale); #endif /* Deallocate R and/or C if it is not used. */ if ( Equil && Fact != SamePattern_SameRowPerm ) { switch ( ScalePermstruct->DiagScale ) { case NOEQUIL: SUPERLU_FREE(R); SUPERLU_FREE(C); break; case ROW: SUPERLU_FREE(C); break; case COL: SUPERLU_FREE(R); break; } } if ( !factored || (factored && options->IterRefine) ) Destroy_CompCol_Permuted_dist(&AC); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pzgssvx_ABglobal()"); #endif } SuperLU_DIST_5.3.0/SRC/zlaqgs_dist.c0000644013363400111340000001016313233431301015754 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Equilibrates a general sparse M by N matrix A */ /* * File name: zlaqgs.c * History: Modified from LAPACK routine ZLAQGE */ #include #include "superlu_zdefs.h" /*! \brief
    Purpose   
    =======   

    ZLAQGS_DIST equilibrates a general sparse M by N matrix A using the row
    and column scaling factors in the vectors R and C.   

    See supermatrix.h for the definition of 'SuperMatrix' structure.

    Arguments   
    =========   

    A       (input/output) SuperMatrix*
            On exit, the equilibrated matrix.  See EQUED for the form of 
            the equilibrated matrix. The type of A can be:
	    Stype = SLU_NC; Dtype = SLU_Z; Mtype = SLU_GE.
	    
    R       (input) double*, dimension (A->nrow)
            The row scale factors for A.
	    
    C       (input) double*, dimension (A->ncol)
            The column scale factors for A.
	    
    ROWCND  (input) double
            Ratio of the smallest R(i) to the largest R(i).
	    
    COLCND  (input) double
            Ratio of the smallest C(i) to the largest C(i).
	    
    AMAX    (input) double
            Absolute value of largest matrix entry.
	    
    EQUED   (output) char*
            Specifies the form of equilibration that was done.   
            = 'N':  No equilibration   
            = 'R':  Row equilibration, i.e., A has been premultiplied by  
                    diag(R).   
            = 'C':  Column equilibration, i.e., A has been postmultiplied  
                    by diag(C).   
            = 'B':  Both row and column equilibration, i.e., A has been
                    replaced by diag(R) * A * diag(C).   

    Internal Parameters   
    ===================   

    THRESH is a threshold value used to decide if row or column scaling   
    should be done based on the ratio of the row or column scaling   
    factors.  If ROWCND < THRESH, row scaling is done, and if   
    COLCND < THRESH, column scaling is done.   

    LARGE and SMALL are threshold values used to decide if row scaling   
    should be done based on the absolute size of the largest matrix   
    element.  If AMAX > LARGE or AMAX < SMALL, row scaling is done.   

    ===================================================================== 
*/ void zlaqgs_dist(SuperMatrix *A, double *r, double *c, double rowcnd, double colcnd, double amax, char *equed) { #define THRESH (0.1) /* Local variables */ NCformat *Astore; doublecomplex *Aval; int i, j, irow; double large, small, cj; double temp; /* Quick return if possible */ if (A->nrow <= 0 || A->ncol <= 0) { *(unsigned char *)equed = 'N'; return; } Astore = (NCformat *) A->Store; Aval = (doublecomplex *) Astore->nzval; /* Initialize LARGE and SMALL. */ small = dmach_dist("Safe minimum") / dmach_dist("Precision"); large = 1. / small; if (rowcnd >= THRESH && amax >= small && amax <= large) { if (colcnd >= THRESH) *(unsigned char *)equed = 'N'; else { /* Column scaling */ for (j = 0; j < A->ncol; ++j) { cj = c[j]; for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { zd_mult(&Aval[i], &Aval[i], cj); } } *(unsigned char *)equed = 'C'; } } else if (colcnd >= THRESH) { /* Row scaling, no column scaling */ for (j = 0; j < A->ncol; ++j) for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; zd_mult(&Aval[i], &Aval[i], r[irow]); } *(unsigned char *)equed = 'R'; } else { /* Row and column scaling */ for (j = 0; j < A->ncol; ++j) { cj = c[j]; for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; temp = cj * r[irow]; zd_mult(&Aval[i], &Aval[i], temp); } } *(unsigned char *)equed = 'B'; } return; } /* zlaqgs_dist */ SuperLU_DIST_5.3.0/SRC/pzgsmv_AXglobal.c0000644013363400111340000002215213233431301016530 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Performs sparse matrix-vector multiplication * *
 * -- Distributed SuperLU routine (version 1.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 * 
*/ #include #include "superlu_zdefs.h" static void zcreate_msr_matrix(SuperMatrix *, int_t [], int_t, doublecomplex **, int_t **); static void zPrintMSRmatrix(int, doublecomplex [], int_t [], gridinfo_t *); int pzgsmv_AXglobal_setup ( SuperMatrix *A, /* Matrix A permuted by columns (input). The type of A can be: Stype = SLU_NCP; Dtype = SLU_Z; Mtype = SLU_GE. */ Glu_persist_t *Glu_persist, /* input */ gridinfo_t *grid, /* input */ int_t *m, /* output */ int_t *update[], /* output */ doublecomplex *val[], /* output */ int_t *bindx[], /* output */ int_t *mv_sup_to_proc /* output */ ) { int n; int input_option; int N_update; /* Number of variables updated on this process (output) */ int iam = grid->iam; int nprocs = grid->nprow * grid->npcol; int_t *xsup = Glu_persist->xsup; int_t *supno = Glu_persist->supno; int_t nsupers; int i, nsup, p, t1, t2, t3; /* Initialize the list of global indices. * NOTE: the list of global indices must be in ascending order. */ n = A->nrow; input_option = SUPER_LINEAR; nsupers = supno[n-1] + 1; #if ( DEBUGlevel>=2 ) if ( !iam ) { PrintInt10("xsup", supno[n-1]+1, xsup); PrintInt10("supno", n, supno); } #endif if ( input_option == SUPER_LINEAR ) { /* Block partitioning based on individual rows. */ /* Figure out mv_sup_to_proc[] on all processes. */ for (p = 0; p < nprocs; ++p) { t1 = n / nprocs; /* Number of rows */ t2 = n - t1 * nprocs; /* left-over, which will be assigned to the first t2 processes. */ if ( p >= t2 ) t2 += (p * t1); /* Starting row number */ else { /* First t2 processes will get one more row. */ ++t1; /* Number of rows. */ t2 = p * t1; /* Starting row. */ } /* Make sure the starting and ending rows are at the supernode boundaries. */ t3 = t2 + t1; /* Ending row. */ nsup = supno[t2]; if ( t2 > xsup[nsup] ) { /* Round up the starting row. */ t1 -= xsup[nsup+1] - t2; t2 = xsup[nsup+1]; } nsup = supno[t3]; if ( t3 > xsup[nsup] ) /* Round up the ending row. */ t1 += xsup[nsup+1] - t3; t3 = t2 + t1 - 1; if ( t1 ) { for (i = supno[t2]; i <= supno[t3]; ++i) { mv_sup_to_proc[i] = p; #if ( DEBUGlevel>=3 ) if ( mv_sup_to_proc[i] == p-1 ) { fprintf(stderr, "mv_sup_to_proc conflicts at supno %d\n", i); exit(-1); } #endif } } if ( iam == p ) { N_update = t1; if ( N_update ) { if ( !(*update = intMalloc_dist(N_update)) ) ABORT("Malloc fails for update[]"); } for (i = 0; i < N_update; ++i) (*update)[i] = t2 + i; #if ( DEBUGlevel>=3 ) printf("(%2d) N_update = %4d\t" "supers %4d to %4d\trows %4d to %4d\n", iam, N_update, supno[t2], supno[t3], t2, t3); #endif } } /* for p ... */ } else if ( input_option == SUPER_BLOCK ) { /* Block partitioning based on individual supernodes. */ /* This may cause bad load balance, because the blocks are usually small in the beginning and large toward the end. */ t1 = nsupers / nprocs; t2 = nsupers - t1 * nprocs; /* left-over */ if ( iam >= t2 ) t2 += (iam * t1); else { ++t1; /* Number of blocks. */ t2 = iam * t1; /* Starting block. */ } N_update = xsup[t2+t1] - xsup[t2]; if ( !(*update = intMalloc_dist(N_update)) ) ABORT("Malloc fails for update[]"); for (i = 0; i < N_update; ++i) (*update)[i] = xsup[t2] + i; } /* Create an MSR matrix in val/bindx to be used by pdgsmv(). */ zcreate_msr_matrix(A, *update, N_update, val, bindx); #if ( DEBUGlevel>=2 ) PrintInt10("mv_sup_to_proc", nsupers, mv_sup_to_proc); zPrintMSRmatrix(N_update, *val, *bindx, grid); #endif *m = N_update; return 0; } /* PZGSMV_AXglobal_SETUP */ /*! \brief * *
 * Create the distributed modified sparse row (MSR) matrix: bindx/val.
 * For a submatrix of size m-by-n, the MSR arrays are as follows:
 *    bindx[0]      = m + 1
 *    bindx[0..m]   = pointer to start of each row
 *    bindx[ks..ke] = column indices of the off-diagonal nonzeros in row k,
 *                    where, ks = bindx[k], ke = bindx[k+1]-1
 *    val[k]        = A(k,k), k < m, diagonal elements
 *    val[m]        = not used
 *    val[ki]       = A(k, bindx[ki]), where ks <= ki <= ke
 * Both arrays are of length nnz + 1.
 * 
*/ static void zcreate_msr_matrix ( SuperMatrix *A, /* Matrix A permuted by columns (input). The type of A can be: Stype = SLU_NCP; Dtype = SLU_Z; Mtype = SLU_GE. */ int_t update[], /* input (local) */ int_t N_update, /* input (local) */ doublecomplex **val, /* output */ int_t **bindx /* output */ ) { int hi, i, irow, j, k, lo, n, nnz_local, nnz_diag; NCPformat *Astore; doublecomplex *nzval; int_t *rowcnt; doublecomplex zero = {0.0, 0.0}; if ( !N_update ) return; n = A->ncol; Astore = A->Store; nzval = Astore->nzval; /* One pass of original matrix A to count nonzeros of each row. */ if ( !(rowcnt = (int_t *) intCalloc_dist(N_update)) ) ABORT("Malloc fails for rowcnt[]"); lo = update[0]; hi = update[N_update-1]; nnz_local = 0; nnz_diag = 0; for (j = 0; j < n; ++j) { for (i = Astore->colbeg[j]; i < Astore->colend[j]; ++i) { irow = Astore->rowind[i]; if ( irow >= lo && irow <= hi ) { if ( irow != j ) /* Exclude diagonal */ ++rowcnt[irow - lo]; else ++nnz_diag; /* Count nonzero diagonal entries */ ++nnz_local; } } } /* Add room for the logical diagonal zeros which are not counted in nnz_local. */ nnz_local += (N_update - nnz_diag); /* Allocate storage for bindx[] and val[]. */ if ( !(*val = (doublecomplex *) doublecomplexMalloc_dist(nnz_local+1)) ) ABORT("Malloc fails for val[]"); for (i = 0; i < N_update; ++i) (*val)[i] = zero; /* Initialize diagonal */ if ( !(*bindx = (int_t *) SUPERLU_MALLOC((nnz_local+1) * sizeof(int_t))) ) ABORT("Malloc fails for bindx[]"); /* Set up row pointers. */ (*bindx)[0] = N_update + 1; for (j = 1; j <= N_update; ++j) { (*bindx)[j] = (*bindx)[j-1] + rowcnt[j-1]; rowcnt[j-1] = (*bindx)[j-1]; } /* One pass of original matrix A to fill in matrix entries. */ for (j = 0; j < n; ++j) { for (i = Astore->colbeg[j]; i < Astore->colend[j]; ++i) { irow = Astore->rowind[i]; if ( irow >= lo && irow <= hi ) { if ( irow == j ) /* Diagonal */ (*val)[irow - lo] = nzval[i]; else { irow -= lo; k = rowcnt[irow]; (*bindx)[k] = j; (*val)[k] = nzval[i]; ++rowcnt[irow]; } } } } SUPERLU_FREE(rowcnt); } /*! \brief * *
 * Performs sparse matrix-vector multiplication.
 *   - val/bindx stores the distributed MSR matrix A
 *   - X is global
 *   - ax product is distributed the same way as A
 * 
*/ int pzgsmv_AXglobal(int_t m, int_t update[], doublecomplex val[], int_t bindx[], doublecomplex X[], doublecomplex ax[]) { int_t i, j, k; doublecomplex zero = {0.0, 0.0}; doublecomplex temp; if ( m <= 0 ) return 0; /* number of rows (local) */ for (i = 0; i < m; ++i) { ax[i] = zero; for (k = bindx[i]; k < bindx[i+1]; ++k) { j = bindx[k]; /* column index */ zz_mult(&temp, &val[k], &X[j]); z_add(&ax[i], &ax[i], &temp); } zz_mult(&temp, &val[i], &X[update[i]]); /* diagonal */ z_add(&ax[i], &ax[i], &temp); } return 0; } /* PZGSMV_AXglobal */ /* * Performs sparse matrix-vector multiplication. * - val/bindx stores the distributed MSR matrix A * - X is global * - ax product is distributed the same way as A */ int pzgsmv_AXglobal_abs(int_t m, int_t update[], doublecomplex val[], int_t bindx[], doublecomplex X[], double ax[]) { int_t i, j, k; if ( m <= 0 ) return 0; /* number of rows (local) */ for (i = 0; i < m; ++i) { ax[i] = 0.0; for (k = bindx[i]; k < bindx[i+1]; ++k) { j = bindx[k]; /* column index */ ax[i] += slud_z_abs1(&val[k]) * slud_z_abs1(&X[j]); } ax[i] += slud_z_abs1(&val[i]) * slud_z_abs1(&X[update[i]]); /* diagonal */ } return 0; } /* PZGSMV_AXglobal_ABS */ /* * Print the local MSR matrix */ static void zPrintMSRmatrix ( int m, /* Number of rows of the submatrix. */ doublecomplex val[], int_t bindx[], gridinfo_t *grid ) { int iam, nnzp1; if ( !m ) return; iam = grid->iam; nnzp1 = bindx[m]; printf("(%2d) MSR submatrix has %d rows -->\n", iam, m); PrintDoublecomplex("val", nnzp1, val); PrintInt10("bindx", nnzp1, bindx); } SuperLU_DIST_5.3.0/SRC/sp_colorder.c0000644013363400111340000001711413233431301015746 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Permutes the columns of the original matrix * *
 * -- Distributed SuperLU routine (version 5.1.3) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 * December 31, 2016 version 5.1.3
 * 
*/ #include "superlu_ddefs.h" int check_perm_dist(char *, int_t, int_t *); /*! \brief * *
 * Purpose
 * =======
 *
 * sp_colorder() permutes the columns of the original matrix. It performs
 * the following steps:
 *
 *    1. Apply column permutation perm_c[] to A's column pointers to form AC;
 *
 *    2. If options->Fact = DOFACT, then
 *       (1) Compute column elimination tree etree[] of AC'AC;
 *       (2) Post order etree[] to get a postordered elimination tree etree[],
 *           and a postorder permutation post[];
 *       (3) Apply post[] permutation to columns of AC;
 *       (4) Overwrite perm_c[] with the product perm_c * post.
 *
 * Arguments
 * =========
 *
 * options (input) superlu_dist_options_t*
 *         Specifies whether or not the elimination tree will be re-used.
 *         If options->Fact == DOFACT, this means first time factor A, 
 *         etree is computed and output.
 *         Otherwise, re-factor A, etree is input, unchanged on exit.
 *
 * A       (input) SuperMatrix*
 *         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
 *         of the linear equations is A->nrow. Currently, the type of A can be:
 *         Stype = SLU_NC or SLU_NCP; Dtype = SLU__D; Mtype = SLU_GE.
 *         In the future, more general A can be handled.
 *
 * perm_c  (input/output) int*
 *	   Column permutation vector of size A->ncol, which defines the 
 *         permutation matrix Pc; perm_c[i] = j means column i of A is 
 *         in position j in A*Pc.
 *         If options->Fact == DOFACT, perm_c is both input and output.
 *         On output, it is changed according to a postorder of etree.
 *         Otherwise, perm_c is input.
 *         
 * etree   (input/output) int*
 *         Elimination tree of Pc*(A'+A)*Pc', dimension A->ncol.
 *         If options->Fact == DOFACT, etree is an output argument,
 *         otherwise it is an input argument.
 *         Note: etree is a vector of parent pointers for a forest whose
 *         vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
 *
 * AC      (output) SuperMatrix*
 *         The resulting matrix after applied the column permutation
 *         perm_c[] to matrix A. The type of AC can be:
 *         Stype = SLU_NCP; Dtype = A->Dtype; Mtype = SLU_GE.
 * 
*/ void sp_colorder(superlu_dist_options_t *options, SuperMatrix *A, int_t *perm_c, int_t *etree, SuperMatrix *AC) { NCformat *Astore; NCPformat *ACstore; int_t *iwork, *post; register int_t n, i; #if ( DEBUGlevel>=1 ) int iam; MPI_Comm_rank( MPI_COMM_WORLD, &iam ); CHECK_MALLOC(iam, "Enter sp_colorder()"); #endif n = A->ncol; /* Apply column permutation perm_c to A's column pointers so to obtain NCP format in AC = A*Pc. */ AC->Stype = SLU_NCP; AC->Dtype = A->Dtype; AC->Mtype = A->Mtype; AC->nrow = A->nrow; AC->ncol = A->ncol; Astore = A->Store; ACstore = AC->Store = (void *) SUPERLU_MALLOC( sizeof(NCPformat) ); if ( !ACstore ) ABORT("SUPERLU_MALLOC fails for ACstore"); ACstore->nnz = Astore->nnz; ACstore->nzval = Astore->nzval; ACstore->rowind = Astore->rowind; ACstore->colbeg = (int_t*) SUPERLU_MALLOC(n*sizeof(int_t)); if ( !(ACstore->colbeg) ) ABORT("SUPERLU_MALLOC fails for ACstore->colbeg"); ACstore->colend = (int_t*) SUPERLU_MALLOC(n*sizeof(int_t)); if ( !(ACstore->colend) ) ABORT("SUPERLU_MALLOC fails for ACstore->colend"); #if ( DEBUGlevel>=3 ) if ( !iam ) { PrintInt10("pre_order:", n, perm_c); check_perm_dist("Initial perm_c", n, perm_c); } #endif for (i = 0; i < n; i++) { ACstore->colbeg[perm_c[i]] = Astore->colptr[i]; ACstore->colend[perm_c[i]] = Astore->colptr[i+1]; } if ( options->Fact == DOFACT || options->Fact == SamePattern ) { /* In this case, perm_r[] may be changed, etree(Pr*A + (Pr*A)') may be changed, so need to recompute etree. */ /* Factor A "from scratch" -- we also compute the etree, and * make perm_c consistent with the postorder of the etree. */ iwork = (int_t*) SUPERLU_MALLOC((n+1)*sizeof(int_t)); if ( !iwork ) ABORT("SUPERLU_MALLOC fails for iwork[]"); if ( A->nrow != A->ncol /* Rectangular matrix */ || options->ColPerm == MMD_ATA ) { /* Compute the column etree of A*Pc'. */ sp_coletree_dist(ACstore->colbeg, ACstore->colend, ACstore->rowind, A->nrow, A->ncol, etree); } else { /* Compute the etree of Pc*(A'+A)*Pc'. */ int_t *b_colptr, *b_rowind, bnz, j; int_t *c_colbeg, *c_colend; /* Form B = A + A'. */ at_plus_a_dist(n, Astore->nnz, Astore->colptr, Astore->rowind, &bnz, &b_colptr, &b_rowind); /* Form C = Pc*B*Pc'. */ c_colbeg = (int_t*) SUPERLU_MALLOC(n*sizeof(int_t)); c_colend = (int_t*) SUPERLU_MALLOC(n*sizeof(int_t)); if (!(c_colbeg) || !(c_colend) ) ABORT("SUPERLU_MALLOC fails for c_colbeg/c_colend"); for (i = 0; i < n; i++) { c_colbeg[perm_c[i]] = b_colptr[i]; c_colend[perm_c[i]] = b_colptr[i+1]; } for (j = 0; j < n; ++j) { for (i = c_colbeg[j]; i < c_colend[j]; ++i) { b_rowind[i] = perm_c[b_rowind[i]]; } } /* Compute etree of C. */ sp_symetree_dist(c_colbeg, c_colend, b_rowind, n, etree); SUPERLU_FREE(b_colptr); if ( bnz ) SUPERLU_FREE(b_rowind); SUPERLU_FREE(c_colbeg); SUPERLU_FREE(c_colend); } #if ( DEBUGlevel>=3 ) if ( !iam ) PrintInt10("etree:", n, etree); #endif /* Post order etree */ post = (int_t *) TreePostorder_dist(n, etree); /* for (i = 0; i < n+1; ++i) inv_post[post[i]] = i; iwork = post; */ /* Renumber etree in postorder */ for (i = 0; i < n; ++i) iwork[post[i]] = post[etree[i]]; for (i = 0; i < n; ++i) etree[i] = iwork[i]; #if ( DEBUGlevel>=3 ) if ( !iam ) PrintInt10("postorder etree:", n, etree); #endif /* Postmultiply A*Pc by post[] */ for (i = 0; i < n; ++i) iwork[post[i]] = ACstore->colbeg[i]; for (i = 0; i < n; ++i) ACstore->colbeg[i] = iwork[i]; for (i = 0; i < n; ++i) iwork[post[i]] = ACstore->colend[i]; for (i = 0; i < n; ++i) ACstore->colend[i] = iwork[i]; for (i = 0; i < n; ++i) iwork[i] = post[perm_c[i]]; /* product of perm_c and post */ for (i = 0; i < n; ++i) perm_c[i] = iwork[i]; #if ( DEBUGlevel>=3 ) if ( !iam ) { PrintInt10("Pc*post:", n, perm_c); check_perm_dist("final perm_c", n, perm_c); } #endif SUPERLU_FREE (post); SUPERLU_FREE (iwork); } /* end if options->Fact == DOFACT ... */ #if ( DEBUGlevel>=1 ) /* Memory allocated but not freed: ACstore, ACstore->colbeg, ACstore->colend */ CHECK_MALLOC(iam, "Exit sp_colorder()"); #endif } /* SP_COLORDER */ int check_perm_dist(char *what, int_t n, int_t *perm) { register int_t i; int_t *marker; marker = (int_t *) intCalloc_dist(n); for (i = 0; i < n; ++i) { if ( perm[i] >= n || marker[perm[i]] == 1 ) { printf("%s: Not a valid PERM[" IFMT "] = " IFMT "\n", what, i, perm[i]); ABORT("check_perm_dist"); } else { marker[perm[i]] = 1; } } SUPERLU_FREE(marker); return 0; } SuperLU_DIST_5.3.0/SRC/pdgsmv_AXglobal.c0000644013363400111340000002151413233431301016503 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Performs sparse matrix-vector multiplication * *
 * -- Distributed SuperLU routine (version 1.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 * 
*/ #include #include "superlu_ddefs.h" static void dcreate_msr_matrix(SuperMatrix *, int_t [], int_t, double **, int_t **); static void dPrintMSRmatrix(int, double [], int_t [], gridinfo_t *); int pdgsmv_AXglobal_setup ( SuperMatrix *A, /* Matrix A permuted by columns (input). The type of A can be: Stype = SLU_NCP; Dtype = SLU_D; Mtype = SLU_GE. */ Glu_persist_t *Glu_persist, /* input */ gridinfo_t *grid, /* input */ int_t *m, /* output */ int_t *update[], /* output */ double *val[], /* output */ int_t *bindx[], /* output */ int_t *mv_sup_to_proc /* output */ ) { int n; int input_option; int N_update; /* Number of variables updated on this process (output) */ int iam = grid->iam; int nprocs = grid->nprow * grid->npcol; int_t *xsup = Glu_persist->xsup; int_t *supno = Glu_persist->supno; int_t nsupers; int i, nsup, p, t1, t2, t3; /* Initialize the list of global indices. * NOTE: the list of global indices must be in ascending order. */ n = A->nrow; input_option = SUPER_LINEAR; nsupers = supno[n-1] + 1; #if ( DEBUGlevel>=2 ) if ( !iam ) { PrintInt10("xsup", supno[n-1]+1, xsup); PrintInt10("supno", n, supno); } #endif if ( input_option == SUPER_LINEAR ) { /* Block partitioning based on individual rows. */ /* Figure out mv_sup_to_proc[] on all processes. */ for (p = 0; p < nprocs; ++p) { t1 = n / nprocs; /* Number of rows */ t2 = n - t1 * nprocs; /* left-over, which will be assigned to the first t2 processes. */ if ( p >= t2 ) t2 += (p * t1); /* Starting row number */ else { /* First t2 processes will get one more row. */ ++t1; /* Number of rows. */ t2 = p * t1; /* Starting row. */ } /* Make sure the starting and ending rows are at the supernode boundaries. */ t3 = t2 + t1; /* Ending row. */ nsup = supno[t2]; if ( t2 > xsup[nsup] ) { /* Round up the starting row. */ t1 -= xsup[nsup+1] - t2; t2 = xsup[nsup+1]; } nsup = supno[t3]; if ( t3 > xsup[nsup] ) /* Round up the ending row. */ t1 += xsup[nsup+1] - t3; t3 = t2 + t1 - 1; if ( t1 ) { for (i = supno[t2]; i <= supno[t3]; ++i) { mv_sup_to_proc[i] = p; #if ( DEBUGlevel>=3 ) if ( mv_sup_to_proc[i] == p-1 ) { fprintf(stderr, "mv_sup_to_proc conflicts at supno %d\n", i); exit(-1); } #endif } } if ( iam == p ) { N_update = t1; if ( N_update ) { if ( !(*update = intMalloc_dist(N_update)) ) ABORT("Malloc fails for update[]"); } for (i = 0; i < N_update; ++i) (*update)[i] = t2 + i; #if ( DEBUGlevel>=3 ) printf("(%2d) N_update = %4d\t" "supers %4d to %4d\trows %4d to %4d\n", iam, N_update, supno[t2], supno[t3], t2, t3); #endif } } /* for p ... */ } else if ( input_option == SUPER_BLOCK ) { /* Block partitioning based on individual supernodes. */ /* This may cause bad load balance, because the blocks are usually small in the beginning and large toward the end. */ t1 = nsupers / nprocs; t2 = nsupers - t1 * nprocs; /* left-over */ if ( iam >= t2 ) t2 += (iam * t1); else { ++t1; /* Number of blocks. */ t2 = iam * t1; /* Starting block. */ } N_update = xsup[t2+t1] - xsup[t2]; if ( !(*update = intMalloc_dist(N_update)) ) ABORT("Malloc fails for update[]"); for (i = 0; i < N_update; ++i) (*update)[i] = xsup[t2] + i; } /* Create an MSR matrix in val/bindx to be used by pdgsmv(). */ dcreate_msr_matrix(A, *update, N_update, val, bindx); #if ( DEBUGlevel>=2 ) PrintInt10("mv_sup_to_proc", nsupers, mv_sup_to_proc); dPrintMSRmatrix(N_update, *val, *bindx, grid); #endif *m = N_update; return 0; } /* PDGSMV_AXglobal_SETUP */ /*! \brief * *
 * Create the distributed modified sparse row (MSR) matrix: bindx/val.
 * For a submatrix of size m-by-n, the MSR arrays are as follows:
 *    bindx[0]      = m + 1
 *    bindx[0..m]   = pointer to start of each row
 *    bindx[ks..ke] = column indices of the off-diagonal nonzeros in row k,
 *                    where, ks = bindx[k], ke = bindx[k+1]-1
 *    val[k]        = A(k,k), k < m, diagonal elements
 *    val[m]        = not used
 *    val[ki]       = A(k, bindx[ki]), where ks <= ki <= ke
 * Both arrays are of length nnz + 1.
 * 
*/ static void dcreate_msr_matrix ( SuperMatrix *A, /* Matrix A permuted by columns (input). The type of A can be: Stype = SLU_NCP; Dtype = SLU_D; Mtype = SLU_GE. */ int_t update[], /* input (local) */ int_t N_update, /* input (local) */ double **val, /* output */ int_t **bindx /* output */ ) { int hi, i, irow, j, k, lo, n, nnz_local, nnz_diag; NCPformat *Astore; double *nzval; int_t *rowcnt; double zero = 0.0; if ( !N_update ) return; n = A->ncol; Astore = A->Store; nzval = Astore->nzval; /* One pass of original matrix A to count nonzeros of each row. */ if ( !(rowcnt = (int_t *) intCalloc_dist(N_update)) ) ABORT("Malloc fails for rowcnt[]"); lo = update[0]; hi = update[N_update-1]; nnz_local = 0; nnz_diag = 0; for (j = 0; j < n; ++j) { for (i = Astore->colbeg[j]; i < Astore->colend[j]; ++i) { irow = Astore->rowind[i]; if ( irow >= lo && irow <= hi ) { if ( irow != j ) /* Exclude diagonal */ ++rowcnt[irow - lo]; else ++nnz_diag; /* Count nonzero diagonal entries */ ++nnz_local; } } } /* Add room for the logical diagonal zeros which are not counted in nnz_local. */ nnz_local += (N_update - nnz_diag); /* Allocate storage for bindx[] and val[]. */ if ( !(*val = (double *) doubleMalloc_dist(nnz_local+1)) ) ABORT("Malloc fails for val[]"); for (i = 0; i < N_update; ++i) (*val)[i] = zero; /* Initialize diagonal */ if ( !(*bindx = (int_t *) SUPERLU_MALLOC((nnz_local+1) * sizeof(int_t))) ) ABORT("Malloc fails for bindx[]"); /* Set up row pointers. */ (*bindx)[0] = N_update + 1; for (j = 1; j <= N_update; ++j) { (*bindx)[j] = (*bindx)[j-1] + rowcnt[j-1]; rowcnt[j-1] = (*bindx)[j-1]; } /* One pass of original matrix A to fill in matrix entries. */ for (j = 0; j < n; ++j) { for (i = Astore->colbeg[j]; i < Astore->colend[j]; ++i) { irow = Astore->rowind[i]; if ( irow >= lo && irow <= hi ) { if ( irow == j ) /* Diagonal */ (*val)[irow - lo] = nzval[i]; else { irow -= lo; k = rowcnt[irow]; (*bindx)[k] = j; (*val)[k] = nzval[i]; ++rowcnt[irow]; } } } } SUPERLU_FREE(rowcnt); } /*! \brief * *
 * Performs sparse matrix-vector multiplication.
 *   - val/bindx stores the distributed MSR matrix A
 *   - X is global
 *   - ax product is distributed the same way as A
 * 
*/ int pdgsmv_AXglobal(int_t m, int_t update[], double val[], int_t bindx[], double X[], double ax[]) { int_t i, j, k; if ( m <= 0 ) return 0; /* number of rows (local) */ for (i = 0; i < m; ++i) { ax[i] = 0.0; for (k = bindx[i]; k < bindx[i+1]; ++k) { j = bindx[k]; /* column index */ ax[i] += val[k] * X[j]; } ax[i] += val[i] * X[update[i]]; /* diagonal */ } return 0; } /* PDGSMV_AXglobal */ /* * Performs sparse matrix-vector multiplication. * - val/bindx stores the distributed MSR matrix A * - X is global * - ax product is distributed the same way as A */ int pdgsmv_AXglobal_abs(int_t m, int_t update[], double val[], int_t bindx[], double X[], double ax[]) { int_t i, j, k; if ( m <= 0 ) return 0; /* number of rows (local) */ for (i = 0; i < m; ++i) { ax[i] = 0.0; for (k = bindx[i]; k < bindx[i+1]; ++k) { j = bindx[k]; /* column index */ ax[i] += fabs(val[k]) * fabs(X[j]); } ax[i] += fabs(val[i]) * fabs(X[update[i]]); /* diagonal */ } return 0; } /* PDGSMV_AXglobal_ABS */ /* * Print the local MSR matrix */ static void dPrintMSRmatrix ( int m, /* Number of rows of the submatrix. */ double val[], int_t bindx[], gridinfo_t *grid ) { int iam, nnzp1; if ( !m ) return; iam = grid->iam; nnzp1 = bindx[m]; printf("(%2d) MSR submatrix has %d rows -->\n", iam, m); PrintDouble5("val", nnzp1, val); PrintInt10("bindx", nnzp1, bindx); } SuperLU_DIST_5.3.0/SRC/psymbfact_util.c0000644013363400111340000004164113233431301016462 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Utilities for parallel symbolic factorization routine * *
 * -- Distributed symbolic factorization auxialiary routine  (version 1.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley - July 2003
 * INRIA France - January 2004
 * Laura Grigori
 *
 * November 1, 2007
 * 
*/ #include "superlu_ddefs.h" #include "psymbfact.h" static void copy_mem_int(int_t howmany, int_t* old, int_t* new) { register int_t i; for (i = 0; i < howmany; i++) new[i] = old[i]; } /*! \brief Expand the existing storage to accommodate more fill-ins. */ /************************************************************************/ static int_t *expand /************************************************************************/ ( int_t prev_len, /* length used from previous call */ int_t min_new_len, /* minimum new length to allocate */ int_t *prev_mem, /* pointer to the previous memory */ int_t *p_new_len, /* length of the new memory allocated */ int_t len_tcopy_fbeg, /* size of the memory to be copied to new store starting from the beginning of the memory */ int_t len_tcopy_fend, /* size of the memory to be copied to new store, starting from the end of the memory */ psymbfact_stat_t *PS ) { float exp = 2.0; float alpha; int_t *new_mem; int_t new_len, tries, lword, extra, bytes_to_copy; alpha = exp; lword = sizeof(int_t); new_len = alpha * prev_len; if (min_new_len > 0 && new_len < min_new_len) new_len = min_new_len; new_mem = (void *) SUPERLU_MALLOC(new_len * lword); PS->allocMem += new_len * lword; if (new_mem) { if (len_tcopy_fbeg != 0) copy_mem_int(len_tcopy_fbeg, prev_mem, new_mem); if (len_tcopy_fend != 0) copy_mem_int(len_tcopy_fend, &(prev_mem[prev_len-len_tcopy_fend]), &(new_mem[new_len-len_tcopy_fend])); } *p_new_len = new_len; return new_mem; } /* EXPAND */ /*! \brief * *
 * Expand the data structures for L and U during the factorization.
 * Return value:   0 - successful return
 *               > 0 - number of bytes allocated when run out of space
 * 
*/ /************************************************************************/ int_t psymbfact_LUXpandMem /************************************************************************/ ( int_t iam, int_t n, /* total number of columns */ int_t vtxXp, /* current vertex */ int_t next, /* number of elements currently in the factors */ int_t min_new_len, /* minimum new length to allocate */ int_t mem_type, /* which type of memory to expand */ int_t rout_type, /* during which type of factorization */ int_t free_prev_mem, /* =1 if prev_mem has to be freed */ Pslu_freeable_t *Pslu_freeable, Llu_symbfact_t *Llu_symbfact, /* modified - global LU data structures */ vtcsInfo_symbfact_t *VInfo, psymbfact_stat_t *PS ) { int_t *new_mem, *prev_mem, *xsub; /* size of the memory to be copied to new store starting from the beginning/end of the memory */ int_t xsub_nextLvl; int_t exp, prev_xsub_nextLvl, vtxXp_lid; int_t *globToLoc, maxNvtcsPProc, nvtcs_loc; int_t fstVtx_nextLvl, fstVtx_nextLvl_lid, vtx_lid, i, j; int_t len_tcopy_fbeg, len_tcopy_fend, new_len, prev_len; exp = 2; globToLoc = Pslu_freeable->globToLoc; nvtcs_loc = VInfo->nvtcs_loc; maxNvtcsPProc = Pslu_freeable->maxNvtcsPProc; fstVtx_nextLvl = VInfo->fstVtx_nextLvl; vtxXp_lid = LOCAL_IND( globToLoc[vtxXp] ); len_tcopy_fbeg = next; if (fstVtx_nextLvl == n) fstVtx_nextLvl_lid = nvtcs_loc; else fstVtx_nextLvl_lid = LOCAL_IND( globToLoc[fstVtx_nextLvl] ); if ( mem_type == LSUB ) { prev_mem = Llu_symbfact->lsub; prev_len = Llu_symbfact->szLsub; xsub = Llu_symbfact->xlsub; if (rout_type == DOMAIN_SYMB) prev_xsub_nextLvl = xsub[vtxXp_lid+1]; else prev_xsub_nextLvl = VInfo->xlsub_nextLvl; } else if ( mem_type == USUB ) { prev_mem = Llu_symbfact->usub; prev_len = Llu_symbfact->szUsub; xsub = Llu_symbfact->xusub; if (rout_type == DOMAIN_SYMB) prev_xsub_nextLvl = xsub[vtxXp_lid+1]; else prev_xsub_nextLvl = VInfo->xusub_nextLvl; } len_tcopy_fend = prev_len - prev_xsub_nextLvl; /* if (rout_type == DNS_UPSEPS || rout_type == DNS_CURSEP) { - bug corrected on Sept 1st, 2013 - */ if (rout_type == DNS_UPSEPS) { fstVtx_nextLvl = n; fstVtx_nextLvl_lid = nvtcs_loc; len_tcopy_fend = 0; } #ifdef TEST_SYMB printf ("Pe[" IFMT "] LUXpand mem_t " IFMT " vtxXp " IFMT "\n", iam, mem_type, vtxXp); #endif new_mem = expand (prev_len, min_new_len, prev_mem, &new_len, len_tcopy_fbeg, len_tcopy_fend, PS); if ( !new_mem ) { fprintf(stderr, "Pe[" IFMT "] Can't exp MemType " IFMT ": prv_len " IFMT " min_new " IFMT " new_l " IFMT "\n", iam, mem_type, prev_len, min_new_len, new_len); return ERROR_RET; } xsub_nextLvl = new_len - len_tcopy_fend; /* reset xsub information pointing to A data */ if (fstVtx_nextLvl != n || rout_type == DOMAIN_SYMB) { if (rout_type == DOMAIN_SYMB) vtx_lid = vtxXp_lid + 1; else { vtx_lid = fstVtx_nextLvl_lid +1; } i = xsub_nextLvl + xsub[vtx_lid] - prev_xsub_nextLvl; for (; vtx_lid < nvtcs_loc; vtx_lid ++) { j = xsub[vtx_lid+1] - xsub[vtx_lid]; xsub[vtx_lid] = i; i += j; } xsub[vtx_lid] = i; } if (free_prev_mem) { SUPERLU_FREE (prev_mem); PS->allocMem -= 0; } if ( mem_type == LSUB ) { Llu_symbfact->lsub = new_mem; Llu_symbfact->szLsub = new_len; VInfo->xlsub_nextLvl = xsub_nextLvl; } else if ( mem_type == USUB ) { Llu_symbfact->usub = new_mem; Llu_symbfact->szUsub = new_len; VInfo->xusub_nextLvl = xsub_nextLvl; } Llu_symbfact->no_expand ++; return SUCCES_RET; } /*! \brief * *
 * Expand the data structures for L and U during the factorization.
 * Return value: SUCCES_RET - successful return
 *               ERROR_RET - error due to a memory alocation failure
 * 
*/ /************************************************************************/ int_t psymbfact_LUXpand /************************************************************************/ ( int_t iam, int_t n, /* total number of columns */ int_t fstVtxLvl_loc, /* first vertex in the level to update */ int_t vtxXp, /* current vertex */ int_t *p_next, /* number of elements currently in the factors */ int_t min_new_len, /* minimum new length to allocate */ int_t mem_type, /* which type of memory to expand */ int_t rout_type, /* during which type of factorization */ int_t free_prev_mem, /* =1 if free prev_mem memory */ Pslu_freeable_t *Pslu_freeable, Llu_symbfact_t *Llu_symbfact, /* modified - global LU data structures */ vtcsInfo_symbfact_t *VInfo, psymbfact_stat_t *PS ) { int mem_error; int_t *new_mem, *prev_mem, *xsub, sz_prev_mem; /* size of the memory to be copied to new store starting from the beginning/end of the memory */ int_t exp, prev_xsub_nextLvl, vtxXp_lid, xsub_nextLvl; int_t *globToLoc, nvtcs_loc, maxNvtcsPProc; int_t fstVtx_nextLvl, fstVtx_nextLvl_lid; int_t i, j, k, vtx_lid, len_texp, nelts, nel; int_t fstVtxLvl_loc_lid, prev_len, next; exp = 2; next = *p_next; globToLoc = Pslu_freeable->globToLoc; nvtcs_loc = VInfo->nvtcs_loc; maxNvtcsPProc = Pslu_freeable->maxNvtcsPProc; fstVtx_nextLvl = VInfo->fstVtx_nextLvl; vtxXp_lid = LOCAL_IND( globToLoc[vtxXp] ); if (fstVtx_nextLvl == n) fstVtx_nextLvl_lid = VInfo->nvtcs_loc; else fstVtx_nextLvl_lid = LOCAL_IND( globToLoc[fstVtx_nextLvl] ); if (rout_type == RL_SYMB) fstVtxLvl_loc_lid = LOCAL_IND( globToLoc[fstVtxLvl_loc] ); if ( mem_type == LSUB ) { xsub = Llu_symbfact->xlsub; prev_mem = Llu_symbfact->lsub; prev_xsub_nextLvl = VInfo->xlsub_nextLvl; sz_prev_mem = Llu_symbfact->szLsub; } else if ( mem_type == USUB ) { xsub = Llu_symbfact->xusub; prev_mem = Llu_symbfact->usub; prev_xsub_nextLvl = VInfo->xusub_nextLvl; sz_prev_mem = Llu_symbfact->szUsub; } #ifdef TEST_SYMB printf ("Pe[%d] Expand LU mem_t %d vtxXp %d\n", iam, mem_type, vtxXp); #endif /* Try to expand the size of xsub in the existing memory */ if (rout_type == RL_SYMB) { len_texp = 0; for (vtx_lid = fstVtxLvl_loc_lid; vtx_lid < fstVtx_nextLvl_lid; vtx_lid ++) { nelts = xsub[vtx_lid+1] - xsub[vtx_lid]; if (nelts == 0) nelts = 1; nelts = 2 * nelts; if (nelts > Llu_symbfact->cntelt_vtcs[vtx_lid]) nelts = Llu_symbfact->cntelt_vtcs[vtx_lid]; len_texp += nelts; } /* len_texp = 2 * (xsub[fstVtx_nextLvl_lid] - xsub[fstVtxLvl_loc_lid]); */ prev_len = xsub[fstVtxLvl_loc_lid]; next = prev_len; } else { nelts = xsub[vtxXp_lid+1] - xsub[vtxXp_lid]; if (nelts == 0) nelts = 1; len_texp = xsub[fstVtx_nextLvl_lid] - xsub[vtxXp_lid+1] + 4 * nelts; prev_len = xsub[vtxXp_lid]; } if (prev_len + len_texp >= prev_xsub_nextLvl) { /* not enough memory */ min_new_len = prev_len + len_texp + (sz_prev_mem - prev_xsub_nextLvl); if (mem_error = psymbfact_LUXpandMem (iam, n, vtxXp, next, min_new_len, mem_type, rout_type, 0, Pslu_freeable, Llu_symbfact, VInfo, PS)) return (mem_error); if ( mem_type == LSUB ) new_mem = Llu_symbfact->lsub; else if ( mem_type == USUB ) new_mem = Llu_symbfact->usub; } else new_mem = prev_mem; if (mem_type == LSUB && PS->estimLSz < (prev_len + len_texp)) PS->estimLSz = prev_len + len_texp; if (mem_type == USUB && PS->estimUSz < (prev_len + len_texp)) PS->estimUSz = prev_len; /* expand the space */ if (rout_type == LL_SYMB) { i = xsub[vtxXp_lid] + len_texp; vtx_lid = fstVtx_nextLvl_lid - 1; for (; vtx_lid > vtxXp_lid; vtx_lid --) { j = xsub[vtx_lid]; nel = 0; while (j < xsub[vtx_lid+1] && prev_mem[j] != EMPTY) { nel ++; j ++; } j = xsub[vtx_lid] + nel - 1; k = i - (xsub[vtx_lid+1] - xsub[vtx_lid]) + nel - 1; if (k+1 < i) new_mem[k+1] = EMPTY; while (j >= xsub[vtx_lid]) { new_mem[k] = prev_mem[j]; k--; j--; } k = i; i -= (xsub[vtx_lid+1] - xsub[vtx_lid]); xsub[vtx_lid+1] = k; } xsub[vtx_lid+1] = i; k = *p_next; if (k < xsub[vtx_lid+1]) new_mem[k] = EMPTY; } if (rout_type == RL_SYMB) { *p_next -= xsub[vtxXp_lid]; i = xsub[fstVtxLvl_loc_lid] + len_texp; vtx_lid = fstVtx_nextLvl_lid - 1; for (; vtx_lid >= fstVtxLvl_loc_lid; vtx_lid --) { nelts = 2 * (xsub[vtx_lid+1] - xsub[vtx_lid]); if (nelts == 0) nelts = 2; if (nelts > Llu_symbfact->cntelt_vtcs[vtx_lid]) nelts = Llu_symbfact->cntelt_vtcs[vtx_lid]; j = xsub[vtx_lid]; nel = 0; while (j < xsub[vtx_lid+1] && prev_mem[j] != EMPTY) { nel ++; j ++; } j = xsub[vtx_lid] + nel - 1; k = i - nelts + nel - 1; if (k+1 < i) new_mem[k+1] = EMPTY; while (j >= xsub[vtx_lid]) { new_mem[k] = prev_mem[j]; k--; j--; } k = i; i -= nelts; xsub[vtx_lid+1] = k; } *p_next += xsub[vtxXp_lid]; } if (free_prev_mem && new_mem != prev_mem) SUPERLU_FREE (prev_mem); Llu_symbfact->no_expcp ++; return SUCCES_RET; } /*! \brief * *
 * Expand the data structures for L and U during the factorization.
 * Return value:   0 - successful return
 *               > 0 - number of bytes allocated when run out of space
 * 
*/ /************************************************************************/ int_t psymbfact_LUXpand_RL /************************************************************************/ ( int_t iam, int_t n, /* total number of columns */ int_t vtxXp, /* current vertex */ int_t next, /* number of elements currently in the factors */ int_t len_texp, /* length to expand */ int_t mem_type, /* which type of memory to expand */ Pslu_freeable_t *Pslu_freeable, Llu_symbfact_t *Llu_symbfact, /* modified - global LU data structures */ vtcsInfo_symbfact_t *VInfo, psymbfact_stat_t *PS ) { int_t *new_mem, *prev_mem, *xsub, mem_error, sz_prev_mem; /* size of the memory to be copied to new store starting from the beginning/end of the memory */ int_t exp, prev_xsub_nextLvl, vtxXp_lid, xsub_nextLvl; int_t *globToLoc, nvtcs_loc, maxNvtcsPProc; int_t fstVtx_nextLvl, fstVtx_nextLvl_lid; int_t i, j, k, vtx_lid, nel; int_t fstVtxLvl_loc_lid, prev_len, min_new_len; #ifdef TEST_SYMB printf ("Pe[%d] Expand LU_RL mem_t %d vtxXp %d\n", iam, mem_type, vtxXp); #endif globToLoc = Pslu_freeable->globToLoc; nvtcs_loc = VInfo->nvtcs_loc; maxNvtcsPProc = Pslu_freeable->maxNvtcsPProc; fstVtx_nextLvl = VInfo->fstVtx_nextLvl; vtxXp_lid = LOCAL_IND( globToLoc[vtxXp] ); if (fstVtx_nextLvl == n) fstVtx_nextLvl_lid = VInfo->nvtcs_loc; else fstVtx_nextLvl_lid = LOCAL_IND( globToLoc[fstVtx_nextLvl] ); if ( mem_type == LSUB ) { xsub = Llu_symbfact->xlsub; prev_mem = Llu_symbfact->lsub; prev_xsub_nextLvl = VInfo->xlsub_nextLvl; sz_prev_mem = Llu_symbfact->szLsub; } else if ( mem_type == USUB ) { xsub = Llu_symbfact->xusub; prev_mem = Llu_symbfact->usub; prev_xsub_nextLvl = VInfo->xusub_nextLvl; sz_prev_mem = Llu_symbfact->szUsub; } else ABORT("Tries to expand nonexisting memory type.\n"); /* Try to expand the size of xsub in the existing memory */ prev_len = xsub[vtxXp_lid]; if (prev_len + len_texp >= prev_xsub_nextLvl) { /* not enough memory */ min_new_len = prev_len + len_texp + (sz_prev_mem - prev_xsub_nextLvl); if (mem_error = psymbfact_LUXpandMem (iam, n, vtxXp, next, min_new_len, mem_type, RL_SYMB, 0, Pslu_freeable, Llu_symbfact, VInfo, PS)) return (mem_error); if ( mem_type == LSUB ) new_mem = Llu_symbfact->lsub; else if ( mem_type == USUB ) new_mem = Llu_symbfact->usub; } else new_mem = prev_mem; /* expand the space */ if (mem_type == LSUB && PS->estimLSz < (prev_len + len_texp)) PS->estimLSz = prev_len + len_texp; if (mem_type == USUB && PS->estimUSz < (prev_len + len_texp)) PS->estimUSz = prev_len; i = xsub[vtxXp_lid] + len_texp; vtx_lid = fstVtx_nextLvl_lid - 1; for (; vtx_lid > vtxXp_lid; vtx_lid --) { j = xsub[vtx_lid]; nel = 0; while (j < xsub[vtx_lid+1] && prev_mem[j] != EMPTY) { nel ++; j++; } j = xsub[vtx_lid] + nel - 1; k = i - Llu_symbfact->cntelt_vtcs[vtx_lid] + nel - 1; if (k+1 < i) new_mem[k+1] = EMPTY; while (j >= xsub[vtx_lid]) { new_mem[k] = prev_mem[j]; k--; j--; } k = i; i -= Llu_symbfact->cntelt_vtcs[vtx_lid]; xsub[vtx_lid+1] = k; } xsub[vtx_lid+1] = i; k = next; if (k < xsub[vtx_lid+1]) new_mem[k] = EMPTY; if (new_mem != prev_mem) SUPERLU_FREE (prev_mem); Llu_symbfact->no_expcp ++; return SUCCES_RET; } /*! \brief * *
 * Expand the data structures for L and U pruned during the factorization.
 * Return value: SUCCES_RET - successful return
 *               ERROR_RET - error when run out of space
 * 
*/ /************************************************************************/ int_t psymbfact_prLUXpand /************************************************************************/ ( int_t iam, int_t min_new_len, /* minimum new length to allocate */ #if 0 MemType mem_type, /* which type of memory to expand */ #else /* Sherry */ int mem_type, /* which type of memory to expand */ #endif Llu_symbfact_t *Llu_symbfact, /* modified L/U pruned structures */ psymbfact_stat_t *PS ) { int_t *prev_mem, *new_mem; int_t prev_len, new_len, len_tcopy_fbeg; if ( mem_type == LSUB_PR ) { prev_len = Llu_symbfact->szLsubPr; prev_mem = Llu_symbfact->lsubPr; len_tcopy_fbeg = Llu_symbfact->indLsubPr; } else if ( mem_type == USUB_PR ) { prev_len = Llu_symbfact->szUsubPr; prev_mem = Llu_symbfact->usubPr; len_tcopy_fbeg = Llu_symbfact->indUsubPr; } else ABORT("Tries to expand nonexisting memory type.\n"); #ifdef TEST_SYMB printf ("Pe[%d] Expand prmem prev_len %d min_new_l %d len_tfbeg %d\n", iam, prev_len, min_new_len, len_tcopy_fbeg); #endif new_mem = expand (prev_len, min_new_len, prev_mem, &new_len, len_tcopy_fbeg, 0, PS); if ( !new_mem ) { fprintf(stderr, "Can't expand MemType %d: \n", mem_type); return (ERROR_RET); } Llu_symbfact->no_expand_pr ++; if ( mem_type == LSUB_PR ) { Llu_symbfact->lsubPr = new_mem; Llu_symbfact->szLsubPr = new_len; } else if ( mem_type == USUB_PR ) { Llu_symbfact->usubPr = new_mem; Llu_symbfact->szUsubPr = new_len; } else ABORT("Tries to expand nonexisting memory type.\n"); SUPERLU_FREE (prev_mem); return SUCCES_RET; } SuperLU_DIST_5.3.0/SRC/pdgstrs_lsum.c0000644013363400111340000002760113233431301016163 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Perform local block modifications: lsum[i] -= L_i,k * X[k] * *
 * -- Distributed SuperLU routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * March 15, 2003
 *
 * Modified:
 *     Feburary 7, 2001    use MPI_Isend/MPI_Irecv
 *     October 2, 2001     use MPI_Isend/MPI_Irecv with MPI_Test
 * 
*/ #include "superlu_ddefs.h" #define ISEND_IRECV /* * Function prototypes */ #ifdef _CRAY fortran void STRSM(_fcd, _fcd, _fcd, _fcd, int*, int*, double*, double*, int*, double*, int*); fortran void SGEMM(_fcd, _fcd, int*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int*); _fcd ftcs1; _fcd ftcs2; _fcd ftcs3; #endif /************************************************************************/ /*! \brief * *
 * Purpose
 * =======
 *   Perform local block modifications: lsum[i] -= L_i,k * X[k].
 * 
*/ void dlsum_fmod /************************************************************************/ ( double *lsum, /* Sum of local modifications. */ double *x, /* X array (local) */ double *xk, /* X[k]. */ double *rtemp, /* Result of full matrix-vector multiply. */ int nrhs, /* Number of right-hand sides. */ int knsupc, /* Size of supernode k. */ int_t k, /* The k-th component of X. */ int_t *fmod, /* Modification count for L-solve. */ int_t nlb, /* Number of L blocks. */ int_t lptr, /* Starting position in lsub[*]. */ int_t luptr, /* Starting position in lusup[*]. */ int_t *xsup, gridinfo_t *grid, LocalLU_t *Llu, MPI_Request send_req[], /* input/output */ SuperLUStat_t *stat ) { double alpha = 1.0, beta = 0.0; double *lusup, *lusup1; double *dest; int iam, iknsupc, myrow, nbrow, nsupr, nsupr1, p, pi; int_t i, ii, ik, il, ikcol, irow, j, lb, lk, rel; int_t *lsub, *lsub1, nlb1, lptr1, luptr1; int_t *ilsum = Llu->ilsum; /* Starting position of each supernode in lsum. */ int_t *frecv = Llu->frecv; int_t **fsendx_plist = Llu->fsendx_plist; MPI_Status status; int test_flag; iam = grid->iam; myrow = MYROW( iam, grid ); lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Llu->Lrowind_bc_ptr[lk]; lusup = Llu->Lnzval_bc_ptr[lk]; nsupr = lsub[1]; for (lb = 0; lb < nlb; ++lb) { ik = lsub[lptr]; /* Global block number, row-wise. */ nbrow = lsub[lptr+1]; #ifdef _CRAY SGEMM( ftcs2, ftcs2, &nbrow, &nrhs, &knsupc, &alpha, &lusup[luptr], &nsupr, xk, &knsupc, &beta, rtemp, &nbrow ); #elif defined (USE_VENDOR_BLAS) dgemm_( "N", "N", &nbrow, &nrhs, &knsupc, &alpha, &lusup[luptr], &nsupr, xk, &knsupc, &beta, rtemp, &nbrow, 1, 1 ); #else dgemm_( "N", "N", &nbrow, &nrhs, &knsupc, &alpha, &lusup[luptr], &nsupr, xk, &knsupc, &beta, rtemp, &nbrow ); #endif stat->ops[SOLVE] += 2 * nbrow * nrhs * knsupc + nbrow * nrhs; lk = LBi( ik, grid ); /* Local block number, row-wise. */ iknsupc = SuperSize( ik ); il = LSUM_BLK( lk ); dest = &lsum[il]; lptr += LB_DESCRIPTOR; rel = xsup[ik]; /* Global row index of block ik. */ for (i = 0; i < nbrow; ++i) { irow = lsub[lptr++] - rel; /* Relative row. */ RHS_ITERATE(j) dest[irow + j*iknsupc] -= rtemp[i + j*nbrow]; } luptr += nbrow; if ( (--fmod[lk])==0 ) { /* Local accumulation done. */ ikcol = PCOL( ik, grid ); p = PNUM( myrow, ikcol, grid ); if ( iam != p ) { #ifdef ISEND_IRECV MPI_Isend( &lsum[il - LSUM_H], iknsupc * nrhs + LSUM_H, MPI_DOUBLE, p, LSUM, grid->comm, &send_req[Llu->SolveMsgSent++] ); #else #ifdef BSEND MPI_Bsend( &lsum[il - LSUM_H], iknsupc * nrhs + LSUM_H, MPI_DOUBLE, p, LSUM, grid->comm ); #else MPI_Send( &lsum[il - LSUM_H], iknsupc * nrhs + LSUM_H, MPI_DOUBLE, p, LSUM, grid->comm ); #endif #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent LSUM[%2.0f], size %2d, to P %2d\n", iam, lsum[il-LSUM_H], iknsupc*nrhs+LSUM_H, p); #endif } else { /* Diagonal process: X[i] += lsum[i]. */ ii = X_BLK( lk ); RHS_ITERATE(j) for (i = 0; i < iknsupc; ++i) x[i + ii + j*iknsupc] += lsum[i + il + j*iknsupc]; if ( frecv[lk]==0 ) { /* Becomes a leaf node. */ fmod[lk] = -1; /* Do not solve X[k] in the future. */ lk = LBj( ik, grid );/* Local block number, column-wise. */ lsub1 = Llu->Lrowind_bc_ptr[lk]; lusup1 = Llu->Lnzval_bc_ptr[lk]; nsupr1 = lsub1[1]; #ifdef _CRAY STRSM(ftcs1, ftcs1, ftcs2, ftcs3, &iknsupc, &nrhs, &alpha, lusup1, &nsupr1, &x[ii], &iknsupc); #elif defined (USE_VENDOR_BLAS) dtrsm_("L", "L", "N", "U", &iknsupc, &nrhs, &alpha, lusup1, &nsupr1, &x[ii], &iknsupc, 1, 1, 1, 1); #else dtrsm_("L", "L", "N", "U", &iknsupc, &nrhs, &alpha, lusup1, &nsupr1, &x[ii], &iknsupc); #endif stat->ops[SOLVE] += iknsupc * (iknsupc - 1) * nrhs; #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, ik); #endif /* * Send Xk to process column Pc[k]. */ for (p = 0; p < grid->nprow; ++p) { if ( fsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, ikcol, grid ); #ifdef ISEND_IRECV MPI_Isend( &x[ii - XK_H], iknsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm, &send_req[Llu->SolveMsgSent++] ); #else #ifdef BSEND MPI_Bsend( &x[ii - XK_H], iknsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm ); #else MPI_Send( &x[ii - XK_H], iknsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm ); #endif #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii-XK_H], pi); #endif } } /* * Perform local block modifications. */ nlb1 = lsub1[0] - 1; lptr1 = BC_HEADER + LB_DESCRIPTOR + iknsupc; luptr1 = iknsupc; /* Skip diagonal block L(I,I). */ dlsum_fmod(lsum, x, &x[ii], rtemp, nrhs, iknsupc, ik, fmod, nlb1, lptr1, luptr1, xsup, grid, Llu, send_req, stat); } /* if frecv[lk] == 0 */ } /* if iam == p */ } /* if fmod[lk] == 0 */ } /* for lb ... */ } /* dLSUM_FMOD */ /************************************************************************/ void dlsum_bmod /************************************************************************/ ( double *lsum, /* Sum of local modifications. */ double *x, /* X array (local). */ double *xk, /* X[k]. */ int nrhs, /* Number of right-hand sides. */ int_t k, /* The k-th component of X. */ int_t *bmod, /* Modification count for L-solve. */ int_t *Urbs, /* Number of row blocks in each block column of U.*/ Ucb_indptr_t **Ucb_indptr,/* Vertical linked list pointing to Uindex[].*/ int_t **Ucb_valptr, /* Vertical linked list pointing to Unzval[]. */ int_t *xsup, gridinfo_t *grid, LocalLU_t *Llu, MPI_Request send_req[], /* input/output */ SuperLUStat_t *stat ) { /* * Purpose * ======= * Perform local block modifications: lsum[i] -= U_i,k * X[k]. */ double alpha = 1.0; int iam, iknsupc, knsupc, myrow, nsupr, p, pi; int_t fnz, gik, gikcol, i, ii, ik, ikfrow, iklrow, il, irow, j, jj, lk, lk1, nub, ub, uptr; int_t *usub; double *uval, *dest, *y; int_t *lsub; double *lusup; int_t *ilsum = Llu->ilsum; /* Starting position of each supernode in lsum. */ int_t *brecv = Llu->brecv; int_t **bsendx_plist = Llu->bsendx_plist; MPI_Status status; int test_flag; iam = grid->iam; myrow = MYROW( iam, grid ); knsupc = SuperSize( k ); lk = LBj( k, grid ); /* Local block number, column-wise. */ nub = Urbs[lk]; /* Number of U blocks in block column lk */ for (ub = 0; ub < nub; ++ub) { ik = Ucb_indptr[lk][ub].lbnum; /* Local block number, row-wise. */ usub = Llu->Ufstnz_br_ptr[ik]; uval = Llu->Unzval_br_ptr[ik]; i = Ucb_indptr[lk][ub].indpos; /* Start of the block in usub[]. */ i += UB_DESCRIPTOR; il = LSUM_BLK( ik ); gik = ik * grid->nprow + myrow;/* Global block number, row-wise. */ iknsupc = SuperSize( gik ); ikfrow = FstBlockC( gik ); iklrow = FstBlockC( gik+1 ); RHS_ITERATE(j) { dest = &lsum[il + j*iknsupc]; y = &xk[j*knsupc]; uptr = Ucb_valptr[lk][ub]; /* Start of the block in uval[]. */ for (jj = 0; jj < knsupc; ++jj) { fnz = usub[i + jj]; if ( fnz < iklrow ) { /* Nonzero segment. */ /* AXPY */ for (irow = fnz; irow < iklrow; ++irow) dest[irow - ikfrow] -= uval[uptr++] * y[jj]; stat->ops[SOLVE] += 2 * (iklrow - fnz); } } /* for jj ... */ } if ( (--bmod[ik]) == 0 ) { /* Local accumulation done. */ gikcol = PCOL( gik, grid ); p = PNUM( myrow, gikcol, grid ); if ( iam != p ) { #ifdef ISEND_IRECV MPI_Isend( &lsum[il - LSUM_H], iknsupc * nrhs + LSUM_H, MPI_DOUBLE, p, LSUM, grid->comm, &send_req[Llu->SolveMsgSent++] ); #else #ifdef BSEND MPI_Bsend( &lsum[il - LSUM_H], iknsupc * nrhs + LSUM_H, MPI_DOUBLE, p, LSUM, grid->comm ); #else MPI_Send( &lsum[il - LSUM_H], iknsupc * nrhs + LSUM_H, MPI_DOUBLE, p, LSUM, grid->comm ); #endif #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent LSUM[%2.0f], size %2d, to P %2d\n", iam, lsum[il-LSUM_H], iknsupc*nrhs+LSUM_H, p); #endif } else { /* Diagonal process: X[i] += lsum[i]. */ ii = X_BLK( ik ); dest = &x[ii]; RHS_ITERATE(j) for (i = 0; i < iknsupc; ++i) dest[i + j*iknsupc] += lsum[i + il + j*iknsupc]; if ( !brecv[ik] ) { /* Becomes a leaf node. */ bmod[ik] = -1; /* Do not solve X[k] in the future. */ lk1 = LBj( gik, grid ); /* Local block number. */ lsub = Llu->Lrowind_bc_ptr[lk1]; lusup = Llu->Lnzval_bc_ptr[lk1]; nsupr = lsub[1]; #ifdef _CRAY STRSM(ftcs1, ftcs3, ftcs2, ftcs2, &iknsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &iknsupc); #elif defined (USE_VENDOR_BLAS) dtrsm_("L", "U", "N", "N", &iknsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &iknsupc, 1, 1, 1, 1); #else dtrsm_("L", "U", "N", "N", &iknsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &iknsupc); #endif stat->ops[SOLVE] += iknsupc * (iknsupc + 1) * nrhs; #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, gik); #endif /* * Send Xk to process column Pc[k]. */ for (p = 0; p < grid->nprow; ++p) { if ( bsendx_plist[lk1][p] != EMPTY ) { pi = PNUM( p, gikcol, grid ); #ifdef ISEND_IRECV MPI_Isend( &x[ii - XK_H], iknsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm, &send_req[Llu->SolveMsgSent++] ); #else #ifdef BSEND MPI_Bsend( &x[ii - XK_H], iknsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm ); #else MPI_Send( &x[ii - XK_H], iknsupc * nrhs + XK_H, MPI_DOUBLE, pi, Xk, grid->comm ); #endif #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii-XK_H], pi); #endif } } /* * Perform local block modifications. */ if ( Urbs[lk1] ) dlsum_bmod(lsum, x, &x[ii], nrhs, gik, bmod, Urbs, Ucb_indptr, Ucb_valptr, xsup, grid, Llu, send_req, stat); } /* if brecv[ik] == 0 */ } } /* if bmod[ik] == 0 */ } /* for ub ... */ } /* dlSUM_BMOD */ SuperLU_DIST_5.3.0/SRC/zSchCompUdt-2Ddynamic.c0000644013363400111340000006037613233431301017456 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief This file contains the main loop of pdgstrf which involves rank k * update of the Schur complement. * Uses 2D partitioning for the scatter phase. * *
 * -- Distributed SuperLU routine (version 5.2) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * October 1, 2014
 *
 * Modified: September 14, 2017
 *   - First gather U-panel, then depending on "ldu" (excluding leading zeros), 
 *     gather only trailing columns of the L-panel corresponding to the nonzero
 *     of U-rows.
 *   - Padding zeros for nice dimensions of GEMM.
 *
 */

#define SCHEDULE_STRATEGY guided 

/* 
 * Buffers:
 *     [ lookAhead_L_buff | Remain_L_buff ] : stores the gathered L-panel
 *                                            (A matrix in C := A*B )
 *     bigU : stores the U-panel (B matrix in C := A*B)
 *     bigV : stores the block GEMM result (C matrix in C := A*B)
 */

if ( msg0 && msg2 ) { /* L(:,k) and U(k,:) are not empty. */
    int cum_nrow = 0; /* cumulative number of nonzero rows in L(:,k) */
    int temp_nbrow;   /* nonzero rows in current block L(i,k) */
    lptr  = lptr0;
    luptr = luptr0;
    int Lnbrow, Rnbrow; /* number of nonzero rows in look-ahead window,
			   and remaining part.  */

    /*******************************************************************
     * Separating L blocks into the top part within look-ahead window
     * and the remaining ones.
     *******************************************************************/

     int lookAheadBlk=0, RemainBlk=0;

     tt_start = SuperLU_timer_();

     /* Sherry -- can this loop be threaded?? */
     /* Loop through all blocks in L(:,k) to set up pointers to the start 
      * of each block in the data arrays.
      *   - lookAheadFullRow[i] := number of nonzero rows from block 0 to i
      *   - lookAheadStRow[i] := number of nonzero rows before block i
      *   - lookAhead_lptr[i] := point to the start of block i in L's index[] 
      *   - (ditto Remain_Info[i])
      */
     for (int i = 0; i < nlb; ++i) {
	 ib = lsub[lptr];            /* Block number of L(i,k). */
	 temp_nbrow = lsub[lptr+1];  /* Number of full rows. */
        
	 int look_up_flag = 1; /* assume ib is outside look-up window */
	 for (int j = k0+1; j < SUPERLU_MIN (k0 + num_look_aheads+2, nsupers );
	      ++j) {
		 if ( ib == perm_c_supno[j] ) {
		     look_up_flag = 0; /* flag ib within look-up window */
                     break;            /* Sherry -- can exit the loop?? */
                 }
	 }
	 
	 if ( look_up_flag == 0 ) { /* ib is within look-up window */
	     if (lookAheadBlk==0) {
		 lookAheadFullRow[lookAheadBlk] = temp_nbrow;
	     } else {
		 lookAheadFullRow[lookAheadBlk] = 
		     temp_nbrow + lookAheadFullRow[lookAheadBlk-1];   
	     }
	     lookAheadStRow[lookAheadBlk] = cum_nrow;
	     lookAhead_lptr[lookAheadBlk] = lptr;
	     lookAhead_ib[lookAheadBlk] = ib; 
	     lookAheadBlk++;
	 } else { /* ib is not in look-up window */
	     if ( RemainBlk==0 ) {
		 Remain_info[RemainBlk].FullRow = temp_nbrow;
	     } else {
		 Remain_info[RemainBlk].FullRow = 
		     temp_nbrow + Remain_info[RemainBlk-1].FullRow;   
	     }
             RemainStRow[RemainBlk] = cum_nrow;
             // Remain_lptr[RemainBlk] = lptr;
	     Remain_info[RemainBlk].lptr = lptr;
	     // Remain_ib[RemainBlk] = ib; 
	     Remain_info[RemainBlk].ib = ib; 
	     RemainBlk++;
	 }
	 
         cum_nrow += temp_nbrow;
	 
	 lptr += LB_DESCRIPTOR;  /* Skip descriptor. */
	 lptr += temp_nbrow;     /* Move to next block */
	 luptr += temp_nbrow;
     }  /* for i ... set up pointers for all blocks in L(:,k) */

     lptr = lptr0;
     luptr = luptr0;

     /* leading dimension of L look-ahead buffer, same as Lnbrow */
     //int LDlookAhead_LBuff = lookAheadBlk==0 ? 0 :lookAheadFullRow[lookAheadBlk-1];
     Lnbrow = lookAheadBlk==0 ? 0 : lookAheadFullRow[lookAheadBlk-1];
     /* leading dimension of L remaining buffer, same as Rnbrow */
     //int LDRemain_LBuff = RemainBlk==0 ? 0 : Remain_info[RemainBlk-1].FullRow;
     Rnbrow = RemainBlk==0 ? 0 : Remain_info[RemainBlk-1].FullRow;
     /* assert( cum_nrow == (LDlookAhead_LBuff + LDRemain_LBuff) );*/
     /* Piyush fix */
     //int LDlookAhead_LBuff = lookAheadBlk==0? 0 : lookAheadFullRow[lookAheadBlk-1];

     nbrow = Lnbrow + Rnbrow; /* total number of rows in L */
     LookAheadRowSepMOP += 2*knsupc*(nbrow);

     /***********************************************
      * Gather U blocks (AFTER LOOK-AHEAD WINDOW)   *
      ***********************************************/
     tt_start = SuperLU_timer_();

     if ( nbrow > 0 ) { /* L(:,k) is not empty */
	 /*
	  * Counting U blocks
	  */
	 ldu = 0; /* Calculate ldu for U(k,:) after look-ahead window. */
	 ncols = 0; /* Total number of nonzero columns in U(k,:) */
	 int temp_ncols = 0;

	 /* jj0 contains the look-ahead window that was updated in 
	    dlook_ahead_update.c. Now the search can continue from that point,
	    not to start from block 0. */
#if 0
	 iukp = iukp0; /* point to the first block in index[] */
	 rukp = rukp0; /* point to the start of nzval[] */
#else
	 /* Save pointers at location right after look-ahead window
	    for later restart. */
	 iukp0 = iukp;
	 rukp0 = rukp;
#endif

	 /* if ( iam==0 ) printf("--- k0 %d, k %d, jj0 %d, nub %d\n", k0, k, jj0, nub);*/
	     
         /* 
	  * Loop through all blocks in U(k,:) to set up pointers to the start
          * of each block in the data arrays, store them in Ublock_info[j]
          * for block U(k,j).
  	  */
	 for (j = jj0; j < nub; ++j) { /* jj0 starts after look-ahead window. */
	     temp_ncols = 0;
#if 0
	     /* Sherry - can remove following call, since perm_u == Identity  */
	     arrive_at_ublock(
			      j, &iukp, &rukp, &jb, &ljb, &nsupc,
			      iukp0, rukp0, usub, perm_u, xsup, grid
			      );
#else
	     jb = usub[iukp];
	     /* ljb = LBj (jb, grid);   Local block number of U(k,j). */
	     nsupc = SuperSize(jb);
	     iukp += UB_DESCRIPTOR; /* Start fstnz of block U(k,j). */
#endif
	     Ublock_info[j].iukp = iukp;
	     Ublock_info[j].rukp = rukp;
	     Ublock_info[j].jb = jb;

	     /* if ( iam==0 )
		 printf("j %d: Ublock_info[j].iukp %d, Ublock_info[j].rukp %d,"
			"Ublock_info[j].jb %d, nsupc %d\n", 
			j, Ublock_info[j].iukp, Ublock_info[j].rukp,
			Ublock_info[j].jb, nsupc); */

	     /* Prepare to call GEMM. */
	     jj = iukp;
	     for (; jj < iukp+nsupc; ++jj) {
		 segsize = klst - usub[jj];
		 if ( segsize ) {
                    ++temp_ncols;
                    if ( segsize > ldu ) ldu = segsize;
		 }
	     }

	     Ublock_info[j].full_u_cols = temp_ncols;
	     ncols += temp_ncols;
#if 1	     
	     /* Jump number of nonzeros in block U(k,jj);
		Move to block U(k,j+1) in nzval[] array.  */
	     rukp += usub[iukp - 1];
	     iukp += nsupc;
#endif
         } /* end for j ... compute ldu & ncols */

	 /* Now doing prefix sum on full_u_cols.
	  * After this, full_u_cols is the number of nonzero columns
          * from block 0 to block j.
          */
	 for ( j = jj0+1; j < nub; ++j) {
	     Ublock_info[j].full_u_cols += Ublock_info[j-1].full_u_cols;
	 }
            
	 /* Padding zeros to make {m,n,k} multiple of vector length. */
	 jj = 8; //n;
	 if (gemm_padding > 0 && Rnbrow > jj && ncols > jj && ldu > jj) {
	     gemm_m_pad = Rnbrow + (Rnbrow % GEMM_PADLEN);
	     gemm_n_pad = ncols + (ncols % GEMM_PADLEN);
	     //gemm_n_pad = ncols;
	     //gemm_k_pad = ldu + (ldu % GEMM_PADLEN);
	     gemm_k_pad = ldu;
	     
	     for (i = Rnbrow; i < gemm_m_pad; ++i)  // padding A matrix
		 for (j = 0; j < gemm_k_pad; ++j)
		     Remain_L_buff[i + j*gemm_m_pad] = zero;
	     for (i = 0; i < Rnbrow; ++i)         
		 for (j = ldu; j < gemm_k_pad; ++j)
		     Remain_L_buff[i + j*gemm_m_pad] = zero;
	     for (i = ldu; i < gemm_k_pad; ++i)     // padding B matrix
		 for (j = 0; j < gemm_n_pad; ++j)
		     bigU[i + j*gemm_k_pad] = zero;
	     for (i = 0; i < ldu; ++i)
		 for (j = ncols; j < gemm_n_pad; ++j)
		     bigU[i + j*gemm_k_pad] = zero;
	 } else {
	     gemm_m_pad = Rnbrow;
	     gemm_n_pad = ncols;
	     gemm_k_pad = ldu;
	 }
     
	 tempu = bigU; /* buffer the entire row block U(k,:) */

         /* Gather U(k,:) into buffer bigU[] to prepare for GEMM */
#ifdef _OPENMP
#pragma omp parallel for firstprivate(iukp, rukp) \
    private(j,tempu, jb, nsupc,ljb,segsize, lead_zero, jj, i) \
    default (shared) schedule(SCHEDULE_STRATEGY)
#endif
        for (j = jj0; j < nub; ++j) { /* jj0 starts after look-ahead window. */

            if (j==jj0) tempu = bigU;
            //else tempu = bigU + ldu * Ublock_info[j-1].full_u_cols;
            else tempu = bigU + gemm_k_pad * Ublock_info[j-1].full_u_cols;

            /* == processing each of the remaining columns in parallel == */
#if 0
	    /* Sherry - can remove following call, since perm_u == Identity  */
            arrive_at_ublock(j, &iukp, &rukp, &jb, &ljb, &nsupc,
			     iukp0, rukp0, usub,perm_u, xsup, grid);
#else
	    iukp = Ublock_info[j].iukp;
	    rukp = Ublock_info[j].rukp;
	    jb = Ublock_info[j].jb;
	    nsupc = SuperSize (jb );
#endif
            /* Copy from U(k,j) to tempu[], padding zeros.  */            
            for (jj = iukp; jj < iukp+nsupc; ++jj) {
                segsize = klst - usub[jj];
                if ( segsize ) {
                    lead_zero = ldu - segsize;
                    for (i = 0; i < lead_zero; ++i) tempu[i] = zero;
		    //tempu += lead_zero;
#if (_OPENMP>=201307)
#pragma omp simd
#endif
		    for (i=0; i0), end gather U blocks */

    GatherUTimer += SuperLU_timer_() - tt_start;
    GatherMOP += 2*ldu*ncols;
    int jj_cpu = nub;       /* limit between CPU and GPU */
    int thread_id;
    /*tempv = bigV;*/


    /**********************
     * Gather L blocks    *
     **********************/
     tt_start = SuperLU_timer_();

     /* Loop through the look-ahead blocks to copy Lval into the buffer */
#ifdef _OPENMP
#pragma omp parallel for private(j,jj,tempu,tempv) default (shared)
#endif
     for (i = 0; i < lookAheadBlk; ++i) {
	 int StRowDest, temp_nbrow;
	 if ( i==0 ) {
	     StRowDest = 0;
	     temp_nbrow = lookAheadFullRow[0];
	 } else {
	     StRowDest   = lookAheadFullRow[i-1];
	     temp_nbrow  = lookAheadFullRow[i]-lookAheadFullRow[i-1];
	 }
	 
	 int StRowSource = lookAheadStRow[i];
	 
	 /* Now copying one block into L lookahead buffer */
	 /* #pragma omp parallel for (gives slow down) */
	 // for (int j = 0; j < knsupc; ++j) { 
	 for (j = knsupc-ldu; j < knsupc; ++j) { /* skip leading columns
						    corresponding to zero U rows */
#if 1
	     /* Better let compiler generate memcpy or vectorized code. */
	     //tempu = &lookAhead_L_buff[StRowDest + j*LDlookAhead_LBuff];
	     //tempu = &lookAhead_L_buff[StRowDest + j * Lnbrow];
	     tempu = &lookAhead_L_buff[StRowDest + (j - (knsupc-ldu)) * Lnbrow];
	     tempv = &lusup[luptr+j*nsupr + StRowSource];
#if (_OPENMP>=201307)
#pragma omp simd
#endif
	     for (jj = 0; jj < temp_nbrow; ++jj) tempu[jj] = tempv[jj];
#else
	     //memcpy(&lookAhead_L_buff[StRowDest + j*LDlookAhead_LBuff],
	     memcpy(&lookAhead_L_buff[StRowDest + (j - (knsupc-ldu)) * Lnbrow],
		    &lusup[luptr+j*nsupr + StRowSource],
		    temp_nbrow * sizeof(doublecomplex) );
#endif
	 } /* end for j ... */
     } /* parallel for i ... gather Lval blocks from lookahead window */

     /* Loop through the remaining blocks to copy Lval into the buffer */
#ifdef _OPENMP
#pragma omp parallel for private(i,j,jj,tempu,tempv) default (shared) \
    schedule(SCHEDULE_STRATEGY)
#endif
     for (int i = 0; i < RemainBlk; ++i) {
         int StRowDest, temp_nbrow;
         if ( i==0 )  {
	     StRowDest  = 0;
	     temp_nbrow = Remain_info[0].FullRow;
	 } else  {
	     StRowDest   = Remain_info[i-1].FullRow;
	     temp_nbrow  = Remain_info[i].FullRow - Remain_info[i-1].FullRow;
	 }

	 int StRowSource = RemainStRow[i];

	 /* Now copying a block into L remaining buffer */
	 // #pragma omp parallel for (gives slow down)
	 // for (int j = 0; j < knsupc; ++j) {
	 for (int j = knsupc-ldu; j < knsupc; ++j) {
	     // printf("StRowDest %d Rnbrow %d StRowSource %d \n", StRowDest,Rnbrow ,StRowSource);
#if 1
	     /* Better let compiler generate memcpy or vectorized code. */
	     //tempu = &Remain_L_buff[StRowDest + j*LDRemain_LBuff];
	     //tempu = &Remain_L_buff[StRowDest + (j - (knsupc-ldu)) * Rnbrow];
	     tempu = &Remain_L_buff[StRowDest + (j - (knsupc-ldu)) * gemm_m_pad];
	     tempv = &lusup[luptr + j*nsupr + StRowSource];
#if (_OPENMP>=201307)
#pragma omp simd
#endif
	     for (jj = 0; jj < temp_nbrow; ++jj) tempu[jj] = tempv[jj];
#else
	     //memcpy(&Remain_L_buff[StRowDest + j*LDRemain_LBuff],
	     memcpy(&Remain_L_buff[StRowDest + (j - (knsupc-ldu)) * gemm_m_pad],
		    &lusup[luptr+j*nsupr + StRowSource],
                    temp_nbrow * sizeof(doublecomplex) );
#endif
	 } /* end for j ... */
     } /* parallel for i ... copy Lval into the remaining buffer */

     tt_end = SuperLU_timer_();
     GatherLTimer += tt_end - tt_start;


     /*************************************************************************
      * Perform GEMM (look-ahead L part, and remain L part) followed by Scatter
      *************************************************************************/
     tempu = bigU;  /* setting to the start of padded U(k,:) */
    
     if ( Lnbrow>0 && ldu>0 && ncols>0 ) { /* Both L(:,k) and U(k,:) nonempty */
	 /***************************************************************
	  * Updating blocks in look-ahead window of the LU(look-ahead-rows,:)
	  ***************************************************************/

	 /* Count flops for total GEMM calls */
	 ncols = Ublock_info[nub-1].full_u_cols;
	 flops_t flps = 8.0 * (flops_t)Lnbrow * ldu * ncols;
	 LookAheadScatterMOP += 3 * Lnbrow * ncols; /* scatter-add */
	 schur_flop_counter += flps;
	 stat->ops[FACT]    += flps;
	 LookAheadGEMMFlOp  += flps;

#ifdef _OPENMP
#pragma omp parallel default (shared) private(thread_id)
	 {
	   thread_id = omp_get_thread_num();
 
	   /* Ideally, should organize the loop as:
	      for (j = 0; j < nub; ++j) {
	          for (lb = 0; lb < lookAheadBlk; ++lb) {
	               L(lb,k) X U(k,j) -> tempv[]
		  }
	      }
	      But now, we use collapsed loop to achieve more parallelism.
	      Total number of block updates is:
	      (# of lookAheadBlk in L(:,k)) X (# of blocks in U(k,:))
	   */

	   int i = sizeof(int);
	   int* indirect_thread    = indirect + (ldt + CACHELINE/i) * thread_id;
	   int* indirect2_thread   = indirect2 + (ldt + CACHELINE/i) * thread_id;

#pragma omp for \
    private (nsupc,ljb,lptr,ib,temp_nbrow,cum_nrow)	\
    schedule(dynamic)
#else /* not use _OPENMP */
	   thread_id = 0;
	   int* indirect_thread    = indirect;
	   int* indirect2_thread   = indirect2;
#endif
	   /* Each thread is assigned one loop index ij, responsible for 
	      block update L(lb,k) * U(k,j) -> tempv[]. */
	   for (int ij = 0; ij < lookAheadBlk*(nub-jj0); ++ij) {
	       /* jj0 starts after look-ahead window. */
            int j   = ij/lookAheadBlk + jj0;
            int lb  = ij%lookAheadBlk;

            /* Getting U block U(k,j) information */
            /* unsigned long long ut_start, ut_end; */
            int_t rukp =  Ublock_info[j].rukp;
            int_t iukp =  Ublock_info[j].iukp;
            int jb   =  Ublock_info[j].jb;
            int nsupc = SuperSize(jb);
            int ljb = LBj (jb, grid);  /* destination column block */
            int st_col;
            int ncols;  /* Local variable counts only columns in the block */
            if ( j > jj0 ) { /* jj0 starts after look-ahead window. */
                ncols  = Ublock_info[j].full_u_cols-Ublock_info[j-1].full_u_cols;
                st_col = Ublock_info[j-1].full_u_cols;
            } else {
                ncols  = Ublock_info[j].full_u_cols;
                st_col = 0;   
            }

            /* Getting L block L(i,k) information */
            int_t lptr = lookAhead_lptr[lb];
            int ib   = lookAhead_ib[lb];
            int temp_nbrow = lsub[lptr+1];
            lptr += LB_DESCRIPTOR;
            int cum_nrow = (lb==0 ? 0 : lookAheadFullRow[lb-1]);

	    /* Block-by-block GEMM in look-ahead window */
#if 0
	    i = sizeof(doublecomplex);
	    doublecomplex* tempv1 = bigV + thread_id * (ldt*ldt + CACHELINE/i);
#else
	    doublecomplex* tempv1 = bigV + thread_id * (ldt*ldt);
#endif

#if ( PRNTlevel>= 1)
	    if (thread_id == 0) tt_start = SuperLU_timer_();
	    gemm_max_m = SUPERLU_MAX(gemm_max_m, temp_nbrow);
	    gemm_max_n = SUPERLU_MAX(gemm_max_n, ncols);
	    gemm_max_k = SUPERLU_MAX(gemm_max_k, ldu);
#endif

#if defined (USE_VENDOR_BLAS)            
            zgemm_("N", "N", &temp_nbrow, &ncols, &ldu, &alpha,
		   //&lookAhead_L_buff[(knsupc-ldu)*Lnbrow+cum_nrow], &Lnbrow,
		   &lookAhead_L_buff[cum_nrow], &Lnbrow,
		   &tempu[st_col*ldu], &ldu, &beta, tempv1, &temp_nbrow, 1, 1);
#else
            zgemm_("N", "N", &temp_nbrow, &ncols, &ldu, &alpha,
		   //&lookAhead_L_buff[(knsupc-ldu)*Lnbrow+cum_nrow], &Lnbrow,
		   &lookAhead_L_buff[cum_nrow], &Lnbrow,
		   &tempu[st_col*ldu], &ldu, &beta, tempv1, &temp_nbrow);
#endif

#if (PRNTlevel>=1 )
	    if (thread_id == 0) {
		tt_end = SuperLU_timer_();
		LookAheadGEMMTimer += tt_end - tt_start;
		tt_start = tt_end;
	    }
#endif
            if ( ib < jb ) {
                zscatter_u (
				 ib, jb,
				 nsupc, iukp, xsup,
				 klst, temp_nbrow,
				 lptr, temp_nbrow, lsub,
				 usub, tempv1,
				 Ufstnz_br_ptr, Unzval_br_ptr,
				 grid
			        );
            } else {
#if 0
		//#ifdef USE_VTUNE
	    __SSC_MARK(0x111);// start SDE tracing, note uses 2 underscores
	    __itt_resume(); // start VTune, again use 2 underscores
#endif
                zscatter_l (
				 ib, ljb, 
				 nsupc, iukp, xsup,
 				 klst, temp_nbrow,
				 lptr, temp_nbrow,
				 usub, lsub, tempv1,
				 indirect_thread, indirect2_thread,
				 Lrowind_bc_ptr, Lnzval_bc_ptr,
				 grid
				);
#if 0
		//#ifdef USE_VTUNE
		__itt_pause(); // stop VTune
		__SSC_MARK(0x222); // stop SDE tracing
#endif
            }

#if ( PRNTlevel>=1 )
	    if (thread_id == 0) 
		LookAheadScatterTimer += SuperLU_timer_() - tt_start;
#endif
	   } /* end omp for ij = ... */

#ifdef _OPENMP
	 } /* end omp parallel */
#endif
     } /* end if Lnbrow>0 ... look-ahead GEMM and scatter */

    /***************************************************************
     * Updating remaining rows and columns on CPU.
     ***************************************************************/
    ncols = jj_cpu==0 ? 0 : Ublock_info[jj_cpu-1].full_u_cols;

    if ( Rnbrow>0 && ldu>0 ) { /* There are still blocks remaining ... */
	double flps = 8.0 * (double)Rnbrow * ldu * ncols;
	schur_flop_counter  += flps;
	stat->ops[FACT]     += flps;

#if ( PRNTlevel>=1 )
	RemainGEMM_flops += flps;
	gemm_max_m = SUPERLU_MAX(gemm_max_m, Rnbrow);
	gemm_max_n = SUPERLU_MAX(gemm_max_n, ncols);
	gemm_max_k = SUPERLU_MAX(gemm_max_k, ldu);
	tt_start = SuperLU_timer_();
	/* printf("[%d] .. k0 %d, before large GEMM: %d-%d-%d, RemainBlk %d\n",
	   iam, k0,Rnbrow,ldu,ncols,RemainBlk);  fflush(stdout);
	assert( Rnbrow*ncols < bigv_size ); */
#endif
	/* calling aggregated large GEMM, result stored in bigV[]. */
#if defined (USE_VENDOR_BLAS)
	//zgemm_("N", "N", &Rnbrow, &ncols, &ldu, &alpha,
	zgemm_("N", "N", &gemm_m_pad, &gemm_n_pad, &gemm_k_pad, &alpha,
	       //&Remain_L_buff[(knsupc-ldu)*Rnbrow], &Rnbrow,
	       &Remain_L_buff[0], &gemm_m_pad,
	       &bigU[0], &gemm_k_pad, &beta, bigV, &gemm_m_pad, 1, 1);
#else
	//zgemm_("N", "N", &Rnbrow, &ncols, &ldu, &alpha,
	zgemm_("N", "N", &gemm_m_pad, &gemm_n_pad, &gemm_k_pad, &alpha,
	       //&Remain_L_buff[(knsupc-ldu)*Rnbrow], &Rnbrow,
	       &Remain_L_buff[0], &gemm_m_pad,
	       &bigU[0], &gemm_k_pad, &beta, bigV, &gemm_m_pad);
#endif

#if ( PRNTlevel>=1 )
	tt_end = SuperLU_timer_();
	RemainGEMMTimer += tt_end - tt_start;
#if ( PROFlevel>=1 )
	//fprintf(fgemm, "%8d%8d%8d %16.8e\n", Rnbrow, ncols, ldu,
	// (tt_end - tt_start)*1e6); // time in microsecond
	//fflush(fgemm);
	gemm_stats[gemm_count].m = Rnbrow;
	gemm_stats[gemm_count].n = ncols;
	gemm_stats[gemm_count].k = ldu;
	gemm_stats[gemm_count++].microseconds = (tt_end - tt_start) * 1e6;
#endif
	tt_start = SuperLU_timer_();
#endif

#ifdef USE_VTUNE
	__SSC_MARK(0x111);// start SDE tracing, note uses 2 underscores
	__itt_resume(); // start VTune, again use 2 underscores
#endif

	/* Scatter into destination block-by-block. */
#ifdef _OPENMP
#pragma omp parallel default(shared) private(thread_id)
	{
	    thread_id = omp_get_thread_num();
 
	    /* Ideally, should organize the loop as:
               for (j = 0; j < jj_cpu; ++j) {
	           for (lb = 0; lb < RemainBlk; ++lb) {
	               L(lb,k) X U(k,j) -> tempv[]
                   }
               }
	       But now, we use collapsed loop to achieve more parallelism.
	       Total number of block updates is:
	       (# of RemainBlk in L(:,k)) X (# of blocks in U(k,:))
	    */

	    int i = sizeof(int);
	    int* indirect_thread = indirect + (ldt + CACHELINE/i) * thread_id;
	    int* indirect2_thread = indirect2 + (ldt + CACHELINE/i) * thread_id;

#pragma omp for \
    private (j,lb,rukp,iukp,jb,nsupc,ljb,lptr,ib,temp_nbrow,cum_nrow)	\
    schedule(dynamic)
#else /* not use _OPENMP */
	    thread_id = 0;
	    int* indirect_thread = indirect;
	    int* indirect2_thread = indirect2;
#endif
	    /* Each thread is assigned one loop index ij, responsible for 
	       block update L(lb,k) * U(k,j) -> tempv[]. */
	    for (int ij = 0; ij < RemainBlk*(jj_cpu-jj0); ++ij) {
		/* jj_cpu := nub, jj0 starts after look-ahead window. */
		int j   = ij / RemainBlk + jj0; /* j-th block in U panel */
		int lb  = ij % RemainBlk;       /* lb-th block in L panel */

		/* Getting U block U(k,j) information */
		/* unsigned long long ut_start, ut_end; */
		int_t rukp =  Ublock_info[j].rukp;
		int_t iukp =  Ublock_info[j].iukp;
		int jb   =  Ublock_info[j].jb;
		int nsupc = SuperSize(jb);
		int ljb = LBj (jb, grid);
		int st_col;
		int ncols;
		if ( j>jj0 ) {
		    ncols = Ublock_info[j].full_u_cols - Ublock_info[j-1].full_u_cols;
		    st_col = Ublock_info[j-1].full_u_cols;
		} else {
		    ncols = Ublock_info[j].full_u_cols;
		    st_col = 0;   
		}

		/* Getting L block L(i,k) information */
		int_t lptr = Remain_info[lb].lptr;
		int ib   = Remain_info[lb].ib;
		int temp_nbrow = lsub[lptr+1];
		lptr += LB_DESCRIPTOR;
		int cum_nrow = (lb==0 ? 0 : Remain_info[lb-1].FullRow);
		
		/* tempv1 points to block(i,j) in bigV : LDA == Rnbrow */
		//double* tempv1 = bigV + (st_col * Rnbrow + cum_nrow); Sherry 
		doublecomplex* tempv1 = bigV + (st_col * gemm_m_pad + cum_nrow); /* Sherry */

		// printf("[%d] .. before scatter: ib %d, jb %d, temp_nbrow %d, Rnbrow %d\n", iam, ib, jb, temp_nbrow, Rnbrow); fflush(stdout);

		/* Now scattering the block */

		if ( ib < jb ) {
		    zscatter_u (
				ib, jb,
				nsupc, iukp, xsup,
				//klst, Rnbrow, /*** klst, temp_nbrow, Sherry */
				klst, gemm_m_pad, /*** klst, temp_nbrow, Sherry */
				lptr, temp_nbrow, /* row dimension of the block */
				lsub, usub, tempv1,
				Ufstnz_br_ptr, Unzval_br_ptr,
				grid
				);
		} else {
		    zscatter_l(
			       ib, ljb,
			       nsupc, iukp, xsup,
			       //klst, temp_nbrow, Sherry
			       klst, gemm_m_pad, /*** temp_nbrow, Sherry */
			       lptr, temp_nbrow, /* row dimension of the block */
			       usub, lsub, tempv1,
			       indirect_thread, indirect2_thread,
			       Lrowind_bc_ptr,Lnzval_bc_ptr,
			       grid
			       );
		}
		
	    } /* end omp for (int ij =...) */
	    
#ifdef _OPENMP
	} /* end omp parallel region */
#endif
	
#if ( PRNTlevel>=1 )
	RemainScatterTimer += SuperLU_timer_() - tt_start;
#endif

#ifdef USE_VTUNE
	__itt_pause(); // stop VTune
	__SSC_MARK(0x222); // stop SDE tracing
#endif

    } /* end if Rnbrow>0 ... update remaining block */

}  /* end if L(:,k) and U(k,:) are not empty */
SuperLU_DIST_5.3.0/SRC/pdlangs.c0000644013363400111340000001011313233431301015053 0ustar  xiaoyessg/*! \file
Copyright (c) 2003, The Regents of the University of California, through
Lawrence Berkeley National Laboratory (subject to receipt of any required 
approvals from U.S. Dept. of Energy) 

All rights reserved. 

The source code is distributed under BSD license, see the file License.txt
at the top-level directory.
*/


/*! @file 
 * \brief Returns the value of the one norm, or the Frobenius norm, or the infinity norm, or the element of largest value
 *
 * 
 * File name:	pdlangs.c
 * History:     Modified from lapack routine DLANGE
 * 
*/ #include #include "superlu_ddefs.h" /*! \brief
 
    Purpose   
    =======   

    PDLANGS returns the value of the one norm, or the Frobenius norm, or 
    the infinity norm, or the element of largest absolute value of a 
    real matrix A.   

    Description   
    ===========   

    PDLANGE returns the value   

       PDLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'   
                 (   
                 ( norm1(A),         NORM = '1', 'O' or 'o'   
                 (   
                 ( normI(A),         NORM = 'I' or 'i'   
                 (   
                 ( normF(A),         NORM = 'F', 'f', 'E' or 'e'   

    where  norm1  denotes the  one norm of a matrix (maximum column sum), 
    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and 
    normF  denotes the  Frobenius norm of a matrix (square root of sum of 
    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.   

    Arguments   
    =========   

    NORM    (input) CHARACTER*1   
            Specifies the value to be returned in DLANGE as described above.   
    A       (input) SuperMatrix*
            The M by N sparse matrix A. 
    GRID    (input) gridinof_t*
            The 2D process mesh.
   ===================================================================== 
*/ double pdlangs(char *norm, SuperMatrix *A, gridinfo_t *grid) { /* Local variables */ NRformat_loc *Astore; int_t m_loc; double *Aval; int_t i, j, jcol; double value=0., sum; double *rwork; double tempvalue; double *temprwork; Astore = (NRformat_loc *) A->Store; m_loc = Astore->m_loc; Aval = (double *) Astore->nzval; if ( SUPERLU_MIN(A->nrow, A->ncol) == 0) { value = 0.; } else if ( strncmp(norm, "M", 1)==0 ) { /* Find max(abs(A(i,j))). */ value = 0.; for (i = 0; i < m_loc; ++i) { for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) value = SUPERLU_MAX( value, fabs(Aval[j]) ); } MPI_Allreduce(&value, &tempvalue, 1, MPI_DOUBLE, MPI_MAX, grid->comm); value = tempvalue; } else if ( strncmp(norm, "O", 1)==0 || *(unsigned char *)norm == '1') { /* Find norm1(A). */ value = 0.; #if 0 for (j = 0; j < A->ncol; ++j) { sum = 0.; for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) sum += fabs(Aval[i]); value = SUPERLU_MAX(value,sum); } #else /* XSL ==> */ if ( !(rwork = (double *) doubleCalloc_dist(A->ncol)) ) ABORT("doubleCalloc_dist fails for rwork."); for (i = 0; i < m_loc; ++i) { for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) { jcol = Astore->colind[j]; rwork[jcol] += fabs(Aval[j]); } } if ( !(temprwork = (double *) doubleCalloc_dist(A->ncol)) ) ABORT("doubleCalloc_dist fails for temprwork."); MPI_Allreduce(rwork, temprwork, A->ncol, MPI_DOUBLE, MPI_SUM, grid->comm); value = 0.; for (j = 0; j < A->ncol; ++j) { value = SUPERLU_MAX(value, temprwork[j]); } SUPERLU_FREE (temprwork); SUPERLU_FREE (rwork); #endif } else if ( strncmp(norm, "I", 1)==0 ) { /* Find normI(A). */ value = 0.; sum = 0.; for (i = 0; i < m_loc; ++i) { for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) sum += fabs(Aval[j]); value = SUPERLU_MAX(value, sum); } MPI_Allreduce(&value, &tempvalue, 1, MPI_DOUBLE, MPI_MAX, grid->comm); value = tempvalue; } else if ( strncmp(norm, "F", 1)==0 || strncmp(norm, "E", 1)==0 ) { /* Find normF(A). */ ABORT("Not implemented."); } else { ABORT("Illegal norm specified."); } return (value); } /* pdlangs */ SuperLU_DIST_5.3.0/SRC/pdlaqgs.c0000644013363400111340000001025213233431301015062 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Equilibrates a general sparse M by N matrix * *
 * File name:	pdlaqgs.c
 * History:     Modified from LAPACK routine DLAQGE
 * 
*/ #include #include "superlu_ddefs.h" /*! \brief
    Purpose   
    =======   

    PDLAQGS equilibrates a general sparse M by N matrix A using the row
    and column scaling factors in the vectors R and C.   

    See supermatrix.h for the definition of 'SuperMatrix' structure.

    Arguments   
    =========   

    A       (input/output) SuperMatrix*
            On exit, the equilibrated matrix.  See EQUED for the form of 
            the equilibrated matrix. The type of A can be:
	    Stype = SLU_NR_loc; Dtype = SLU_D; Mtype = SLU_GE.
	    
    R       (input) double*, dimension (A->nrow)
            The row scale factors for A.
	    
    C       (input) double*, dimension (A->ncol)
            The column scale factors for A.
	    
    ROWCND  (input) double
            Ratio of the smallest R(i) to the largest R(i).
	    
    COLCND  (input) double
            Ratio of the smallest C(i) to the largest C(i).
	    
    AMAX    (input) double
            Absolute value of largest matrix entry.
	    
    EQUED   (output) char*
            Specifies the form of equilibration that was done.   
            = 'N':  No equilibration   
            = 'R':  Row equilibration, i.e., A has been premultiplied by  
                    diag(R).   
            = 'C':  Column equilibration, i.e., A has been postmultiplied  
                    by diag(C).   
            = 'B':  Both row and column equilibration, i.e., A has been
                    replaced by diag(R) * A * diag(C).   

    Internal Parameters   
    ===================   

    THRESH is a threshold value used to decide if row or column scaling   
    should be done based on the ratio of the row or column scaling   
    factors.  If ROWCND < THRESH, row scaling is done, and if   
    COLCND < THRESH, column scaling is done.   

    LARGE and SMALL are threshold values used to decide if row scaling   
    should be done based on the absolute size of the largest matrix   
    element.  If AMAX > LARGE or AMAX < SMALL, row scaling is done.   

    ===================================================================== 
*/ void pdlaqgs(SuperMatrix *A, double *r, double *c, double rowcnd, double colcnd, double amax, char *equed) { #define THRESH (0.1) /* Local variables */ NRformat_loc *Astore; double *Aval; int_t i, j, irow, jcol, m_loc; double large, small; /* Quick return if possible */ if (A->nrow <= 0 || A->ncol <= 0) { *(unsigned char *)equed = 'N'; return; } Astore = A->Store; Aval = Astore->nzval; m_loc = Astore->m_loc; /* Initialize LARGE and SMALL. */ small = dmach_dist("Safe minimum") / dmach_dist("Precision"); large = 1. / small; if (rowcnd >= THRESH && amax >= small && amax <= large) { if (colcnd >= THRESH) *(unsigned char *)equed = 'N'; else { /* Column scaling */ irow = Astore->fst_row; for (i = 0; i < m_loc; ++i) { for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) { jcol = Astore->colind[j]; Aval[j] *= c[jcol]; } ++irow; } *(unsigned char *)equed = 'C'; } } else if (colcnd >= THRESH) { /* Row scaling, no column scaling */ irow = Astore->fst_row; for (i = 0; i < m_loc; ++i) { for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) Aval[j] *= r[irow]; ++irow; } *(unsigned char *)equed = 'R'; } else { /* Both row and column scaling */ irow = Astore->fst_row; for (i = 0; i < m_loc; ++i) { for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) { jcol = Astore->colind[j]; Aval[j] = Aval[j] * r[irow] * c[jcol]; } ++irow; } *(unsigned char *)equed = 'B'; } return; } /* pdlaqgs */ SuperLU_DIST_5.3.0/SRC/static_schedule.c0000644013363400111340000007771313233431301016611 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Performs static scheduling for the look-ahead factorization algorithm. * *
 * -- Distributed SuperLU routine (version 4.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * August 15, 2014
 *
 * Modified:
 *
 * Reference:
 * 
 * 
*/ #include "superlu_ddefs.h" #ifdef ISORT extern void isort (int_t N, int_t * ARRAY1, int_t * ARRAY2); extern void isort1 (int_t N, int_t * ARRAY); #else int superlu_sort_perm (const void *arg1, const void *arg2) { const int_t *val1 = (const int_t *) arg1; const int_t *val2 = (const int_t *) arg2; return (*val2 < *val1); } #endif int static_schedule(superlu_dist_options_t * options, int m, int n, LUstruct_t * LUstruct, gridinfo_t * grid, SuperLUStat_t * stat, int_t *perm_c_supno, int_t *iperm_c_supno, int *info) { /* * Arguments * * perm_c_supno (output) * perm_c_superno[k] = j means at the k-th step of elimination, the j-th * panel is chosen. * */ int_t *xsup; int_t i, ib, jb, lb, nlb, il, iu; int_t Pc, Pr; int iam, krow, yourcol, mycol, myrow; int j, k, nsupers; /* k - current panel to work on */ int_t *index; Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; int ncb, nrb, p, pr, pc, nblocks; int_t *etree_supno_l, *etree_supno, *blocks, *blockr, *Ublock, *Urows, *Lblock, *Lrows, *sf_block, *sf_block_l, *nnodes_l, *nnodes_u, *edag_supno_l, *recvbuf, **edag_supno; float edag_supno_l_bytes; int nnodes, *sendcnts, *sdispls, *recvcnts, *rdispls, *srows, *rrows; etree_node *head, *tail, *ptr; int *num_child; int iword = sizeof (int_t); /* Test the input parameters. */ *info = 0; if (m < 0) *info = -2; else if (n < 0) *info = -3; if (*info) { pxerr_dist ("static_schedule", grid, -*info); return (-1); } /* Quick return if possible. */ if (m == 0 || n == 0) return 0; /* * Initialization. */ iam = grid->iam; Pc = grid->npcol; Pr = grid->nprow; myrow = MYROW (iam, grid); mycol = MYCOL (iam, grid); nsupers = Glu_persist->supno[n - 1] + 1; xsup = Glu_persist->xsup; nblocks = 0; ncb = nsupers / Pc; nrb = nsupers / Pr; #if ( DEBUGlevel >= 1 ) print_memorylog(stat, "before static schedule"); #endif /* ================================================== * * static scheduling of j-th step of LU-factorization * * ================================================== */ if (options->lookahead_etree == YES && /* use e-tree of symmetrized matrix and */ (options->ParSymbFact == NO || /* 1) symmetric fact with serial symbolic, or */ (options->SymPattern == YES && /* 2) symmetric pattern, and */ options->RowPerm == NOROWPERM))) { /* no rowperm to destroy symmetry */ /* if symmetric pattern or using e-tree of |A^T|+|A|, then we can use a simple tree structure for static schduling */ if (options->ParSymbFact == NO) { /* Use the etree computed from serial symb. fact., and turn it into supernodal tree. */ int_t *etree = LUstruct->etree; #if ( PRNTlevel>=1 ) if (grid->iam == 0) printf (" === using column e-tree ===\n"); #endif /* look for the first off-diagonal blocks */ etree_supno = SUPERLU_MALLOC (nsupers * sizeof (int_t)); log_memory(nsupers * iword, stat); for (i = 0; i < nsupers; i++) etree_supno[i] = nsupers; for (j = 0, lb = 0; lb < nsupers; lb++) { for (k = 0; k < SuperSize (lb); k++) { jb = Glu_persist->supno[etree[j + k]]; if (jb != lb) etree_supno[lb] = SUPERLU_MIN (etree_supno[lb], jb); } j += SuperSize (lb); } } else { /* ParSymbFACT==YES and SymPattern==YES and RowPerm == NOROWPERM */ /* Compute an "etree" based on struct(L), assuming struct(U) = struct(L'). */ #if ( PRNTlevel>=1 ) if (grid->iam == 0) printf (" === using supernodal e-tree ===\n"); #endif /* find the first block in each supernodal-column of local L-factor */ etree_supno_l = SUPERLU_MALLOC (nsupers * sizeof (int_t)); log_memory(nsupers * iword, stat); for (i = 0; i < nsupers; i++) etree_supno_l[i] = nsupers; for (lb = 0; lb < ncb; lb++) { jb = lb * grid->npcol + mycol; index = Llu->Lrowind_bc_ptr[lb]; if (index) { /* Not an empty column */ i = index[0]; k = BC_HEADER; krow = PROW (jb, grid); if (krow == myrow) { /* skip the diagonal block */ k += LB_DESCRIPTOR + index[k + 1]; i--; } if (i > 0) { etree_supno_l[jb] = index[k]; k += LB_DESCRIPTOR + index[k + 1]; i--; } for (j = 0; j < i; j++) { etree_supno_l[jb] = SUPERLU_MIN (etree_supno_l[jb], index[k]); k += LB_DESCRIPTOR + index[k + 1]; } } } if (mycol < nsupers % grid->npcol) { jb = ncb * grid->npcol + mycol; index = Llu->Lrowind_bc_ptr[ncb]; if (index) { /* Not an empty column */ i = index[0]; k = BC_HEADER; krow = PROW (jb, grid); if (krow == myrow) { /* skip the diagonal block */ k += LB_DESCRIPTOR + index[k + 1]; i--; } if (i > 0) { etree_supno_l[jb] = index[k]; k += LB_DESCRIPTOR + index[k + 1]; i--; } for (j = 0; j < i; j++) { etree_supno_l[jb] = SUPERLU_MIN (etree_supno_l[jb], index[k]); k += LB_DESCRIPTOR + index[k + 1]; } } } /* form global e-tree */ etree_supno = SUPERLU_MALLOC (nsupers * sizeof (int_t)); MPI_Allreduce (etree_supno_l, etree_supno, nsupers, mpi_int_t, MPI_MIN, grid->comm); SUPERLU_FREE (etree_supno_l); } /* initialize number of children for each node */ num_child = SUPERLU_MALLOC (nsupers * sizeof (int_t)); for (i = 0; i < nsupers; i++) num_child[i] = 0; for (i = 0; i < nsupers; i++) if (etree_supno[i] != nsupers) num_child[etree_supno[i]]++; /* push initial leaves to the fifo queue */ nnodes = 0; for (i = 0; i < nsupers; i++) { if (num_child[i] == 0) { ptr = SUPERLU_MALLOC (sizeof (etree_node)); ptr->id = i; ptr->next = NULL; /*printf( " == push leaf %d (%d) ==\n",i,nnodes ); */ nnodes++; if (nnodes == 1) { head = ptr; tail = ptr; } else { tail->next = ptr; tail = ptr; } } } /* process fifo queue, and compute the ordering */ i = 0; while (nnodes > 0) { ptr = head; j = ptr->id; head = ptr->next; perm_c_supno[i] = j; SUPERLU_FREE (ptr); i++; nnodes--; if (etree_supno[j] != nsupers) { num_child[etree_supno[j]]--; if (num_child[etree_supno[j]] == 0) { nnodes++; ptr = SUPERLU_MALLOC (sizeof (etree_node)); ptr->id = etree_supno[j]; ptr->next = NULL; /*printf( "=== push %d ===\n",ptr->id ); */ if (nnodes == 1) { head = ptr; tail = ptr; } else { tail->next = ptr; tail = ptr; } } } /*printf( "\n" ); */ } SUPERLU_FREE (num_child); SUPERLU_FREE (etree_supno); log_memory(-2 * nsupers * iword, stat); } else { /* Unsymmetric pattern */ /* Need to process both L- and U-factors, use the symmetrically pruned graph of L & U instead of tree (very naive implementation) */ int nrbp1 = nrb + 1; float Ublock_bytes, Urows_bytes, Lblock_bytes, Lrows_bytes; /* allocate some workspace */ if (! (sendcnts = SUPERLU_MALLOC ((4 + 2 * nrbp1) * Pr * Pc * sizeof (int)))) ABORT ("Malloc fails for sendcnts[]."); log_memory((4 + 2 * nrbp1) * Pr * Pc * sizeof (int), stat); sdispls = &sendcnts[Pr * Pc]; recvcnts = &sdispls[Pr * Pc]; rdispls = &recvcnts[Pr * Pc]; srows = &rdispls[Pr * Pc]; rrows = &srows[Pr * Pc * nrbp1]; myrow = MYROW (iam, grid); #if ( PRNTlevel>=1 ) if (grid->iam == 0) printf (" === using DAG ===\n"); #endif /* send supno block of local U-factor to a processor * * who owns the corresponding block of L-factor */ /* srows : # of block to send to a processor from each supno row */ /* sendcnts: total # of blocks to send to a processor */ for (p = 0; p < Pr * Pc * nrbp1; p++) srows[p] = 0; for (p = 0; p < Pr * Pc; p++) sendcnts[p] = 0; /* sending blocks of U-factors corresponding to L-factors */ /* count the number of blocks to send */ for (lb = 0; lb < nrb; ++lb) { jb = lb * Pr + myrow; pc = jb % Pc; index = Llu->Ufstnz_br_ptr[lb]; if (index) { /* Not an empty row */ k = BR_HEADER; nblocks += index[0]; for (j = 0; j < index[0]; ++j) { ib = index[k]; pr = ib % Pr; p = pr * Pc + pc; sendcnts[p]++; srows[p * nrbp1 + lb]++; k += UB_DESCRIPTOR + SuperSize (index[k]); } } } if (myrow < nsupers % grid->nprow) { jb = nrb * Pr + myrow; pc = jb % Pc; index = Llu->Ufstnz_br_ptr[nrb]; if (index) { /* Not an empty row */ k = BR_HEADER; nblocks += index[0]; for (j = 0; j < index[0]; ++j) { ib = index[k]; pr = ib % Pr; p = pr * Pc + pc; sendcnts[p]++; srows[p * nrbp1 + nrb]++; k += UB_DESCRIPTOR + SuperSize (index[k]); } } } /* insert blocks to send */ sdispls[0] = 0; for (p = 1; p < Pr * Pc; p++) sdispls[p] = sdispls[p - 1] + sendcnts[p - 1]; if (!(blocks = intMalloc_dist (nblocks))) ABORT ("Malloc fails for blocks[]."); log_memory( nblocks * iword, stat ); for (lb = 0; lb < nrb; ++lb) { jb = lb * Pr + myrow; pc = jb % Pc; index = Llu->Ufstnz_br_ptr[lb]; if (index) { /* Not an empty row */ k = BR_HEADER; for (j = 0; j < index[0]; ++j) { ib = index[k]; pr = ib % Pr; p = pr * Pc + pc; blocks[sdispls[p]] = ib; sdispls[p]++; k += UB_DESCRIPTOR + SuperSize (index[k]); } } } if (myrow < nsupers % grid->nprow) { jb = nrb * Pr + myrow; pc = jb % Pc; index = Llu->Ufstnz_br_ptr[nrb]; if (index) { /* Not an empty row */ k = BR_HEADER; for (j = 0; j < index[0]; ++j) { ib = index[k]; pr = ib % Pr; p = pr * Pc + pc; blocks[sdispls[p]] = ib; sdispls[p]++; k += UB_DESCRIPTOR + SuperSize (index[k]); } } } /* communication */ MPI_Alltoall (sendcnts, 1, MPI_INT, recvcnts, 1, MPI_INT, grid->comm); MPI_Alltoall (srows, nrbp1, MPI_INT, rrows, nrbp1, MPI_INT, grid->comm); log_memory( -(nblocks * iword), stat ); /* blocks[] to be freed soon */ nblocks = recvcnts[0]; rdispls[0] = sdispls[0] = 0; for (p = 1; p < Pr * Pc; p++) { rdispls[p] = rdispls[p - 1] + recvcnts[p - 1]; sdispls[p] = sdispls[p - 1] + sendcnts[p - 1]; nblocks += recvcnts[p]; } if (!(blockr = intMalloc_dist (nblocks))) ABORT ("Malloc fails for blockr[]."); log_memory( nblocks * iword, stat ); MPI_Alltoallv (blocks, sendcnts, sdispls, mpi_int_t, blockr, recvcnts, rdispls, mpi_int_t, grid->comm); SUPERLU_FREE (blocks); /* memory logged before */ /* store the received U-blocks by rows */ nlb = nsupers / Pc; if (!(Ublock = intMalloc_dist (nblocks))) ABORT ("Malloc fails for Ublock[]."); if (!(Urows = intMalloc_dist (1 + nlb))) ABORT ("Malloc fails for Urows[]."); Ublock_bytes = nblocks * iword; Urows_bytes = (1 + nlb) * iword; log_memory( Ublock_bytes + Urows_bytes, stat ); k = 0; for (jb = 0; jb < nlb; jb++) { j = jb * Pc + mycol; pr = j % Pr; lb = j / Pr; Urows[jb] = 0; for (pc = 0; pc < Pc; pc++) { p = pr * Pc + pc; /* the processor owning this block of U-factor */ for (i = rdispls[p]; i < rdispls[p] + rrows[p * nrbp1 + lb]; i++) { Ublock[k] = blockr[i]; k++; Urows[jb]++; } rdispls[p] += rrows[p * nrbp1 + lb]; } /* sort by the column indices to make things easier for later on */ #ifdef ISORT isort1 (Urows[jb], &(Ublock[k - Urows[jb]])); #else qsort (&(Ublock[k - Urows[jb]]), (size_t) (Urows[jb]), sizeof (int_t), &superlu_sort_perm); #endif } if (mycol < nsupers % grid->npcol) { j = nlb * Pc + mycol; pr = j % Pr; lb = j / Pr; Urows[nlb] = 0; for (pc = 0; pc < Pc; pc++) { p = pr * Pc + pc; for (i = rdispls[p]; i < rdispls[p] + rrows[p * nrbp1 + lb]; i++) { Ublock[k] = blockr[i]; k++; Urows[nlb]++; } rdispls[p] += rrows[p * nrb + lb]; } #ifdef ISORT isort1 (Urows[nlb], &(Ublock[k - Urows[nlb]])); #else qsort (&(Ublock[k - Urows[nlb]]), (size_t) (Urows[nlb]), sizeof (int_t), &superlu_sort_perm); #endif } SUPERLU_FREE (blockr); log_memory( -nblocks * iword, stat ); /* sort the block in L-factor */ nblocks = 0; for (lb = 0; lb < ncb; lb++) { jb = lb * Pc + mycol; index = Llu->Lrowind_bc_ptr[lb]; if (index) { /* Not an empty column */ nblocks += index[0]; } } if (mycol < nsupers % grid->npcol) { jb = ncb * Pc + mycol; index = Llu->Lrowind_bc_ptr[ncb]; if (index) { /* Not an empty column */ nblocks += index[0]; } } if (!(Lblock = intMalloc_dist (nblocks))) ABORT ("Malloc fails for Lblock[]."); if (!(Lrows = intMalloc_dist (1 + ncb))) ABORT ("Malloc fails for Lrows[]."); Lblock_bytes = nblocks * iword; Lrows_bytes = (1 + ncb) * iword; log_memory(Lblock_bytes + Lrows_bytes, stat); for (lb = 0; lb <= ncb; lb++) Lrows[lb] = 0; nblocks = 0; for (lb = 0; lb < ncb; lb++) { Lrows[lb] = 0; jb = lb * Pc + mycol; index = Llu->Lrowind_bc_ptr[lb]; if (index) { /* Not an empty column */ i = index[0]; k = BC_HEADER; krow = PROW (jb, grid); if (krow == myrow) /* skip the diagonal block */ { k += LB_DESCRIPTOR + index[k + 1]; i--; } for (j = 0; j < i; j++) { Lblock[nblocks] = index[k]; Lrows[lb]++; nblocks++; k += LB_DESCRIPTOR + index[k + 1]; } } #ifdef ISORT isort1 (Lrows[lb], &(Lblock[nblocks - Lrows[lb]])); #else qsort (&(Lblock[nblocks - Lrows[lb]]), (size_t) (Lrows[lb]), sizeof (int_t), &superlu_sort_perm); #endif } if (mycol < nsupers % grid->npcol) { Lrows[ncb] = 0; jb = ncb * Pc + mycol; index = Llu->Lrowind_bc_ptr[ncb]; if (index) { /* Not an empty column */ i = index[0]; k = BC_HEADER; krow = PROW (jb, grid); if (krow == myrow) { /* skip the diagonal block */ k += LB_DESCRIPTOR + index[k + 1]; i--; } for (j = 0; j < i; j++) { Lblock[nblocks] = index[k]; Lrows[ncb]++; nblocks++; k += LB_DESCRIPTOR + index[k + 1]; } #ifdef ISORT isort1 (Lrows[ncb], &(Lblock[nblocks - Lrows[ncb]])); #else qsort (&(Lblock[nblocks - Lrows[ncb]]), (size_t) (Lrows[ncb]), sizeof (int_t), &superlu_sort_perm); #endif } } /* look for the first local symmetric nonzero block match */ if (!(sf_block = intMalloc_dist (nsupers))) ABORT ("Malloc fails for sf_block[]."); if (!(sf_block_l = intMalloc_dist (nsupers))) ABORT ("Malloc fails for sf_block_l[]."); log_memory( 2 * nsupers * iword, stat ); for (lb = 0; lb < nsupers; lb++) sf_block_l[lb] = nsupers; i = 0; j = 0; for (jb = 0; jb < nlb; jb++) { if (Urows[jb] > 0) { ib = i + Urows[jb]; lb = jb * Pc + mycol; for (k = 0; k < Lrows[jb]; k++) { while (Ublock[i] < Lblock[j] && i + 1 < ib) i++; if (Ublock[i] == Lblock[j]) { sf_block_l[lb] = Lblock[j]; j += (Lrows[jb] - k); k = Lrows[jb]; } else { j++; } } i = ib; } else { j += Lrows[jb]; } } if (mycol < nsupers % grid->npcol) { if (Urows[nlb] > 0) { ib = i + Urows[nlb]; lb = nlb * Pc + mycol; for (k = 0; k < Lrows[nlb]; k++) { while (Ublock[i] < Lblock[j] && i + 1 < ib) i++; if (Ublock[i] == Lblock[j]) { sf_block_l[lb] = Lblock[j]; j += (Lrows[nlb] - k); k = Lrows[nlb]; } else { j++; } } i = ib; } else { j += Lrows[nlb]; } } /* compute the first global symmetric matchs */ MPI_Allreduce (sf_block_l, sf_block, nsupers, mpi_int_t, MPI_MIN, grid->comm); SUPERLU_FREE (sf_block_l); log_memory( -nsupers * iword, stat ); /* count number of nodes in DAG (i.e., the number of blocks on and above the first match) */ if (!(nnodes_l = intMalloc_dist (nsupers))) ABORT ("Malloc fails for nnodes_l[]."); if (!(nnodes_u = intMalloc_dist (nsupers))) ABORT ("Malloc fails for nnodes_u[]."); log_memory( 2 * nsupers * iword, stat ); for (lb = 0; lb < nsupers; lb++) nnodes_l[lb] = 0; for (lb = 0; lb < nsupers; lb++) nnodes_u[lb] = 0; nblocks = 0; /* from U-factor */ for (i = 0, jb = 0; jb < nlb; jb++) { lb = jb * Pc + mycol; ib = i + Urows[jb]; while (i < ib) { if (Ublock[i] <= sf_block[lb]) { nnodes_u[lb]++; i++; nblocks++; } else { /* get out */ i = ib; } } i = ib; } if (mycol < nsupers % grid->npcol) { lb = nlb * Pc + mycol; ib = i + Urows[nlb]; while (i < ib) { if (Ublock[i] <= sf_block[lb]) { nnodes_u[lb]++; i++; nblocks++; } else { /* get out */ i = ib; } } i = ib; } /* from L-factor */ for (i = 0, jb = 0; jb < nlb; jb++) { lb = jb * Pc + mycol; ib = i + Lrows[jb]; while (i < ib) { if (Lblock[i] < sf_block[lb]) { nnodes_l[lb]++; i++; nblocks++; } else { i = ib; } } i = ib; } if (mycol < nsupers % grid->npcol) { lb = nlb * Pc + mycol; ib = i + Lrows[nlb]; while (i < ib) { if (Lblock[i] < sf_block[lb]) { nnodes_l[lb]++; i++; nblocks++; } else { i = ib; } } i = ib; } #ifdef USE_ALLGATHER /* insert local nodes in DAG */ if (!(edag_supno_l = intMalloc_dist (nsupers + nblocks))) ABORT ("Malloc fails for edag_supno_l[]."); edag_supno_l_bytes = (nsupers + nblocks) * iword; log_memory(edag_supno_l_bytes, stat); iu = il = nblocks = 0; for (lb = 0; lb < nsupers; lb++) { j = lb / Pc; pc = lb % Pc; edag_supno_l[nblocks] = nnodes_l[lb] + nnodes_u[lb]; nblocks++; if (mycol == pc) { /* from U-factor */ ib = iu + Urows[j]; for (jb = 0; jb < nnodes_u[lb]; jb++) { edag_supno_l[nblocks] = Ublock[iu]; iu++; nblocks++; } iu = ib; /* from L-factor */ ib = il + Lrows[j]; for (jb = 0; jb < nnodes_l[lb]; jb++) { edag_supno_l[nblocks] = Lblock[il]; il++; nblocks++; } il = ib; } } SUPERLU_FREE (nnodes_u); log_memory(-nsupers * iword, stat); /* form global DAG on each processor */ MPI_Allgather (&nblocks, 1, MPI_INT, recvcnts, 1, MPI_INT, grid->comm); nblocks = recvcnts[0]; rdispls[0] = 0; for (lb = 1; lb < Pc * Pr; lb++) { rdispls[lb] = nblocks; nblocks += recvcnts[lb]; } if (!(recvbuf = intMalloc_dist (nblocks))) ABORT ("Malloc fails for recvbuf[]."); log_memory(nblocks * iword, stat); MPI_Allgatherv (edag_supno_l, recvcnts[iam], mpi_int_t, recvbuf, recvcnts, rdispls, mpi_int_t, grid->comm); SUPERLU_FREE (edag_supno_l); log_memory(-edag_supno_l_bytes, stat); if (!(edag_supno = SUPERLU_MALLOC (nsupers * sizeof (int_t *)))) ABORT ("Malloc fails for edag_supno[]."); log_memory(nsupers * iword, stat); k = 0; for (lb = 0; lb < nsupers; lb++) nnodes_l[lb] = 0; for (p = 0; p < Pc * Pr; p++) { for (lb = 0; lb < nsupers; lb++) { nnodes_l[lb] += recvbuf[k]; k += (1 + recvbuf[k]); } } for (lb = 0; lb < nsupers; lb++) { if (nnodes_l[lb] > 0) if (!(edag_supno[lb] = intMalloc_dist (nnodes_l[lb]))) ABORT ("Malloc fails for edag_supno[lb]."); nnodes_l[lb] = 0; } k = 0; for (p = 0; p < Pc * Pr; p++) { for (lb = 0; lb < nsupers; lb++) { jb = k + recvbuf[k] + 1; k++; for (; k < jb; k++) { edag_supno[lb][nnodes_l[lb]] = recvbuf[k]; nnodes_l[lb]++; } } } SUPERLU_FREE (recvbuf); log_memory(-nblocks * iword, stat); #else /* not USE_ALLGATHER */ int nlsupers = nsupers / Pc; if (mycol < nsupers % Pc) nlsupers++; /* insert local nodes in DAG */ if (!(edag_supno_l = intMalloc_dist (nlsupers + nblocks))) ABORT ("Malloc fails for edag_supno_l[]."); edag_supno_l_bytes = (nlsupers + nblocks) * iword; log_memory(edag_supno_l_bytes, stat); iu = il = nblocks = 0; for (lb = 0; lb < nsupers; lb++) { j = lb / Pc; pc = lb % Pc; if (mycol == pc) { edag_supno_l[nblocks] = nnodes_l[lb] + nnodes_u[lb]; nblocks++; /* from U-factor */ ib = iu + Urows[j]; for (jb = 0; jb < nnodes_u[lb]; jb++) { edag_supno_l[nblocks] = Ublock[iu]; iu++; nblocks++; } iu = ib; /* from L-factor */ ib = il + Lrows[j]; for (jb = 0; jb < nnodes_l[lb]; jb++) { edag_supno_l[nblocks] = Lblock[il]; il++; nblocks++; } il = ib; } else if (nnodes_l[lb] + nnodes_u[lb] != 0) printf (" # %d: nnodes[" IFMT "]=" IFMT "+" IFMT "\n", grid->iam, lb, nnodes_l[lb], nnodes_u[lb]); } SUPERLU_FREE (nnodes_u); log_memory(-nsupers * iword, stat); /* form global DAG on each processor */ MPI_Allgather (&nblocks, 1, MPI_INT, recvcnts, 1, MPI_INT, grid->comm); nblocks = recvcnts[0]; rdispls[0] = 0; for (lb = 1; lb < Pc * Pr; lb++) { rdispls[lb] = nblocks; nblocks += recvcnts[lb]; } if (!(recvbuf = intMalloc_dist (nblocks))) ABORT ("Malloc fails for recvbuf[]."); log_memory(nblocks * iword, stat); MPI_Allgatherv (edag_supno_l, recvcnts[iam], mpi_int_t, recvbuf, recvcnts, rdispls, mpi_int_t, grid->comm); SUPERLU_FREE (edag_supno_l); log_memory(-edag_supno_l_bytes, stat); if (!(edag_supno = SUPERLU_MALLOC (nsupers * sizeof (int_t *)))) ABORT ("Malloc fails for edag_supno[]."); log_memory(nsupers * sizeof(int_t *), stat); k = 0; for (lb = 0; lb < nsupers; lb++) nnodes_l[lb] = 0; for (p = 0; p < Pc * Pr; p++) { yourcol = MYCOL (p, grid); for (lb = 0; lb < nsupers; lb++) { j = lb / Pc; pc = lb % Pc; if (yourcol == pc) { nnodes_l[lb] += recvbuf[k]; k += (1 + recvbuf[k]); } } } for (lb = 0; lb < nsupers; lb++) { if (nnodes_l[lb] > 0) if (!(edag_supno[lb] = intMalloc_dist (nnodes_l[lb]))) ABORT ("Malloc fails for edag_supno[lb]."); nnodes_l[lb] = 0; } k = 0; for (p = 0; p < Pc * Pr; p++) { yourcol = MYCOL (p, grid); for (lb = 0; lb < nsupers; lb++) { j = lb / Pc; pc = lb % Pc; if (yourcol == pc) { jb = k + recvbuf[k] + 1; k++; for (; k < jb; k++) { edag_supno[lb][nnodes_l[lb]] = recvbuf[k]; nnodes_l[lb]++; } } } } SUPERLU_FREE (recvbuf); log_memory( -nblocks * iword , stat); #endif /* end USE_ALL_GATHER */ /* initialize the num of child for each node */ num_child = SUPERLU_MALLOC (nsupers * sizeof (int_t)); for (i = 0; i < nsupers; i++) num_child[i] = 0; for (i = 0; i < nsupers; i++) { for (jb = 0; jb < nnodes_l[i]; jb++) { num_child[edag_supno[i][jb]]++; } } /* push initial leaves to the fifo queue */ nnodes = 0; for (i = 0; i < nsupers; i++) { if (num_child[i] == 0) { ptr = SUPERLU_MALLOC (sizeof (etree_node)); ptr->id = i; ptr->next = NULL; /*printf( " == push leaf %d (%d) ==\n",i,nnodes ); */ nnodes++; if (nnodes == 1) { head = ptr; tail = ptr; } else { tail->next = ptr; tail = ptr; } } } /* process fifo queue, and compute the ordering */ i = 0; while (nnodes > 0) { /*printf( "=== pop %d (%d) ===\n",head->id,i ); */ ptr = head; j = ptr->id; head = ptr->next; perm_c_supno[i] = j; SUPERLU_FREE (ptr); i++; nnodes--; for (jb = 0; jb < nnodes_l[j]; jb++) { num_child[edag_supno[j][jb]]--; if (num_child[edag_supno[j][jb]] == 0) { nnodes++; ptr = SUPERLU_MALLOC (sizeof (etree_node)); ptr->id = edag_supno[j][jb]; ptr->next = NULL; /*printf( "=== push %d ===\n",ptr->id ); */ if (nnodes == 1) { head = ptr; tail = ptr; } else { tail->next = ptr; tail = ptr; } } } /*printf( "\n" ); */ } for (lb = 0; lb < nsupers; lb++) if (nnodes_l[lb] > 0) SUPERLU_FREE (edag_supno[lb]); SUPERLU_FREE (num_child); SUPERLU_FREE (edag_supno); SUPERLU_FREE (nnodes_l); SUPERLU_FREE (sf_block); SUPERLU_FREE (sendcnts); log_memory(-(4 * nsupers + (4 + 2 * nrbp1)*Pr*Pc) * iword, stat); SUPERLU_FREE (Ublock); SUPERLU_FREE (Urows); SUPERLU_FREE (Lblock); SUPERLU_FREE (Lrows); log_memory(-(Ublock_bytes + Urows_bytes + Lblock_bytes + Lrows_bytes), stat); } /* ======================== * * end of static scheduling * * ======================== */ for (lb = 0; lb < nsupers; lb++) iperm_c_supno[perm_c_supno[lb]] = lb; #if ( DEBUGlevel >= 1 ) print_memorylog(stat, "after static schedule"); check_perm_dist("perm_c_supno", nsupers, perm_c_supno); check_perm_dist("iperm_c_supno", nsupers, iperm_c_supno); #endif return 0; } /* STATIC_SCHEDULE */ SuperLU_DIST_5.3.0/SRC/cublas_utils.c0000644013363400111340000001030313233431301016115 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ #include #include "cublas_utils.h" void DisplayHeader() { const int kb = 1024; const int mb = kb * kb; // cout << "NBody.GPU" << endl << "=========" << endl << endl; printf("CUDA version: v %d\n",CUDART_VERSION); //cout << "Thrust version: v" << THRUST_MAJOR_VERSION << "." << THRUST_MINOR_VERSION << endl << endl; int devCount; cudaGetDeviceCount(&devCount); printf( "CUDA Devices: \n \n"); for(int i = 0; i < devCount; ++i) { struct cudaDeviceProp props; cudaGetDeviceProperties(&props, i); printf("%d : %s %d %d\n",i, props.name,props.major,props.minor ); // cout << i << ": " << props.name << ": " << props.major << "." << props.minor << endl; printf(" Global memory: %ld mb \n", props.totalGlobalMem / mb); // cout << " Global memory: " << props.totalGlobalMem / mb << "mb" << endl; printf(" Shared memory: %ld kb \n", props.sharedMemPerBlock / kb ); //<< << "kb" << endl; printf(" Constant memory: %ld kb \n", props.totalConstMem / kb ); printf(" Block registers: %d \n\n", props.regsPerBlock ); // to do these later // printf(" Warp size: %d" << props.warpSize << endl; // printf(" Threads per block: %d" << props.maxThreadsPerBlock << endl; // printf(" Max block dimensions: [ %d" << props.maxThreadsDim[0] << ", " << props.maxThreadsDim[1] << ", " << props.maxThreadsDim[2] << " ]" << endl; // printf(" Max grid dimensions: [ %d" << props.maxGridSize[0] << ", " << props.maxGridSize[1] << ", " << props.maxGridSize[2] << " ]" << endl; // cout << " Shared memory: " << props.sharedMemPerBlock / kb << "kb" << endl; // cout << " Constant memory: " << props.totalConstMem / kb << "kb" << endl; // cout << " Block registers: " << props.regsPerBlock << endl << endl; // cout << " Warp size: " << props.warpSize << endl; // cout << " Threads per block: " << props.maxThreadsPerBlock << endl; // cout << " Max block dimensions: [ " << props.maxThreadsDim[0] << ", " << props.maxThreadsDim[1] << ", " << props.maxThreadsDim[2] << " ]" << endl; // cout << " Max grid dimensions: [ " << props.maxGridSize[0] << ", " << props.maxGridSize[1] << ", " << props.maxGridSize[2] << " ]" << endl; // cout << endl; } } const char* cublasGetErrorString(cublasStatus_t status) { switch(status) { case CUBLAS_STATUS_SUCCESS: return "CUBLAS_STATUS_SUCCESS"; case CUBLAS_STATUS_NOT_INITIALIZED: return "CUBLAS_STATUS_NOT_INITIALIZED"; case CUBLAS_STATUS_ALLOC_FAILED: return "CUBLAS_STATUS_ALLOC_FAILED"; case CUBLAS_STATUS_INVALID_VALUE: return "CUBLAS_STATUS_INVALID_VALUE"; case CUBLAS_STATUS_ARCH_MISMATCH: return "CUBLAS_STATUS_ARCH_MISMATCH"; case CUBLAS_STATUS_MAPPING_ERROR: return "CUBLAS_STATUS_MAPPING_ERROR"; case CUBLAS_STATUS_EXECUTION_FAILED: return "CUBLAS_STATUS_EXECUTION_FAILED"; case CUBLAS_STATUS_INTERNAL_ERROR: return "CUBLAS_STATUS_INTERNAL_ERROR"; } return "unknown error"; } inline cudaError_t checkCuda(cudaError_t result) { #if defined(DEBUG) || defined(_DEBUG) if (result != cudaSuccess) { fprintf(stderr, "CUDA Runtime Error: %s\n", cudaGetErrorString(result)); assert(result == cudaSuccess); } #endif return result; } cublasStatus_t checkCublas(cublasStatus_t result) { #if defined(DEBUG) || defined(_DEBUG) if (result != CUBLAS_STATUS_SUCCESS) { fprintf(stderr, "CUDA Blas Runtime Error: %s\n", cublasGetErrorString(result)); assert(result == CUBLAS_STATUS_SUCCESS); } #endif return result; } cublasHandle_t create_handle () { cublasHandle_t handle; checkCublas(cublasCreate(&handle)); return handle; } void destroy_handle (cublasHandle_t handle) { checkCublas(cublasDestroy(handle)); } SuperLU_DIST_5.3.0/SRC/cublas_utils.h0000644013363400111340000000164013233431301016126 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file *
 * -- Distributed SuperLU routine (version 4.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * October 1, 2014
 * 
*/ #ifndef CUBLAS_UTILS_H #define CUBLAS_UTILS_H #include #include "cuda.h" #include "cuda_runtime_api.h" #include "cuda_runtime.h" extern void DisplayHeader(); extern const char* cublasGetErrorString(cublasStatus_t status); extern cudaError_t checkCuda(cudaError_t); extern cublasStatus_t checkCublas(cublasStatus_t); extern cublasHandle_t create_handle (); extern void destroy_handle (cublasHandle_t handle); #endif SuperLU_DIST_5.3.0/SRC/pdsymbfact_distdata.c0000644013363400111340000017441513233431301017454 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Redistribute the symbolic structure of L and U from the distribution * *
 * -- Parallel symbolic factorization auxialiary routine (version 2.3) --
 * -- Distributes the data from parallel symbolic factorization 
 * -- to numeric factorization
 * INRIA France -  July 1, 2004
 * Laura Grigori
 *
 * November 1, 2007
 * Feburary 20, 2008
 * October 15, 2008
 * 
*/ /* limits.h: the largest positive integer (INT_MAX) */ #include #include "superlu_ddefs.h" #include "psymbfact.h" /*! \brief * *
 * Purpose
 * =======
 * 
 * Redistribute the symbolic structure of L and U from the distribution
 * used in the parallel symbolic factorization step to the distdibution
 * used in the parallel numeric factorization step.  On exit, the L and U
 * structure for the 2D distribution used in the numeric factorization step is
 * stored in p_xlsub, p_lsub, p_xusub, p_usub.  The global supernodal 
 * information is also computed and it is stored in Glu_persist->supno
 * and Glu_persist->xsup.
 *
 * This routine allocates memory for storing the structure of L and U
 * and the supernodes information.  This represents the arrays:
 * p_xlsub, p_lsub, p_xusub, p_usub,
 * Glu_persist->supno,  Glu_persist->xsup.
 *
 * This routine also deallocates memory allocated during symbolic 
 * factorization routine.  That is, the folloing arrays are freed:
 * Pslu_freeable->xlsub,  Pslu_freeable->lsub, 
 * Pslu_freeable->xusub, Pslu_freeable->usub, 
 * Pslu_freeable->globToLoc, Pslu_freeable->supno_loc, 
 * Pslu_freeable->xsup_beg_loc, Pslu_freeable->xsup_end_loc.
 *
 * Arguments
 * =========
 *
 * n      (Input) int_t
 *        Order of the input matrix
 * Pslu_freeable  (Input) Pslu_freeable_t *
 *        Local L and U structure, 
 *        global to local indexing information.
 * 
 * Glu_persist (Output) Glu_persist_t *
 *        Stores on output the information on supernodes mapping.
 * 
 * p_xlsub (Output) int_t **
 *         Pointer to structure of L distributed on a 2D grid 
 *         of processors, stored by columns.
 * 
 * p_lsub  (Output) int_t **
 *         Structure of L distributed on a 2D grid of processors, 
 *         stored by columns.
 *
 * p_xusub (Output) int_t **
 *         Pointer to structure of U distributed on a 2D grid 
 *         of processors, stored by rows.
 * 
 * p_usub  (Output) int_t **
 *         Structure of U distributed on a 2D grid of processors, 
 *         stored by rows.
 * 
 * grid   (Input) gridinfo_t*
 *        The 2D process mesh.
 *
 * Return value
 * ============
 *   < 0, number of bytes allocated on return from the dist_symbLU.
 *   > 0, number of bytes allocated in this routine when out of memory.
 *        (an approximation).
 * 
*/ static float dist_symbLU (int_t n, Pslu_freeable_t *Pslu_freeable, Glu_persist_t *Glu_persist, int_t **p_xlsub, int_t **p_lsub, int_t **p_xusub, int_t **p_usub, gridinfo_t *grid ) { int iam, nprocs, pc, pr, p, np, p_diag; int_t *nnzToSend, *nnzToRecv, *nnzToSend_l, *nnzToSend_u, *tmp_ptrToSend, *mem; int_t *nnzToRecv_l, *nnzToRecv_u; int_t *send_1, *send_2, nsend_1, nsend_2; int_t *ptrToSend, *ptrToRecv, sendL, sendU, *snd_luind, *rcv_luind; int_t nsupers, nsupers_i, nsupers_j; int *nvtcs, *intBuf1, *intBuf2, *intBuf3, *intBuf4, intNvtcs_loc; int_t maxszsn, maxNvtcsPProc; int_t *xsup_n, *supno_n, *temp, *xsup_beg_s, *xsup_end_s, *supno_s; int_t *xlsub_s, *lsub_s, *xusub_s, *usub_s; int_t *xlsub_n, *lsub_n, *xusub_n, *usub_n; int_t *xsub_s, *sub_s, *xsub_n, *sub_n; int_t *globToLoc, nvtcs_loc; int_t SendCnt_l, SendCnt_u, nnz_loc_l, nnz_loc_u, nnz_loc, RecvCnt_l, RecvCnt_u, ind_loc; int_t i, k, j, gb, szsn, gb_n, gb_s, gb_l, fst_s, fst_s_l, lst_s, i_loc; int_t nelts, isize; float memAux; /* Memory used during this routine and freed on return */ float memRet; /* Memory allocated and not freed on return */ int_t iword, dword; /* ------------------------------------------------------------ INITIALIZATION. ------------------------------------------------------------*/ iam = grid->iam; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter dist_symbLU()"); #endif nprocs = (int) grid->nprow * grid->npcol; xlsub_s = Pslu_freeable->xlsub; lsub_s = Pslu_freeable->lsub; xusub_s = Pslu_freeable->xusub; usub_s = Pslu_freeable->usub; maxNvtcsPProc = Pslu_freeable->maxNvtcsPProc; globToLoc = Pslu_freeable->globToLoc; nvtcs_loc = Pslu_freeable->nvtcs_loc; xsup_beg_s = Pslu_freeable->xsup_beg_loc; xsup_end_s = Pslu_freeable->xsup_end_loc; supno_s = Pslu_freeable->supno_loc; rcv_luind = NULL; iword = sizeof(int_t); dword = sizeof(double); memAux = 0.; memRet = 0.; mem = intCalloc_dist(12 * nprocs); if (!mem) return (ERROR_RET); memAux = (float) (12 * nprocs * sizeof(int_t)); nnzToRecv = mem; nnzToSend = nnzToRecv + 2*nprocs; nnzToSend_l = nnzToSend + 2 * nprocs; nnzToSend_u = nnzToSend_l + nprocs; send_1 = nnzToSend_u + nprocs; send_2 = send_1 + nprocs; tmp_ptrToSend = send_2 + nprocs; nnzToRecv_l = tmp_ptrToSend + nprocs; nnzToRecv_u = nnzToRecv_l + nprocs; ptrToSend = nnzToSend; ptrToRecv = nnzToSend + nprocs; nvtcs = (int *) SUPERLU_MALLOC(5 * nprocs * sizeof(int)); intBuf1 = nvtcs + nprocs; intBuf2 = nvtcs + 2 * nprocs; intBuf3 = nvtcs + 3 * nprocs; intBuf4 = nvtcs + 4 * nprocs; memAux += 5 * nprocs * sizeof(int); maxszsn = sp_ienv_dist(3); /* Allocate space for storing Glu_persist_n. */ if ( !(supno_n = intMalloc_dist(n+1)) ) { fprintf (stderr, "Malloc fails for supno_n[]."); return (memAux); } memRet += (float) ((n+1) * sizeof(int_t)); /* ------------------------------------------------------------ DETERMINE SUPERNODES FOR NUMERICAL FACTORIZATION ------------------------------------------------------------*/ if (nvtcs_loc > INT_MAX) ABORT("ERROR in dist_symbLU nvtcs_loc > INT_MAX\n"); intNvtcs_loc = (int) nvtcs_loc; MPI_Gather (&intNvtcs_loc, 1, MPI_INT, nvtcs, 1, MPI_INT, 0, grid->comm); if (!iam) { /* set ptrToRecv to point to the beginning of the data for each processor */ for (k = 0, p = 0; p < nprocs; p++) { ptrToRecv[p] = k; k += nvtcs[p]; } } if (nprocs > 1) { temp = NULL; if (!iam ) { if ( !(temp = intMalloc_dist (n+1)) ) { fprintf (stderr, "Malloc fails for temp[]."); return (memAux + memRet); } memAux += (float) (n+1) * iword; } #if defined (_LONGINT) for (p=0; p INT_MAX) ABORT("ERROR in dist_symbLU size to send > INT_MAX\n"); intBuf1[p] = (int) ptrToRecv[p]; } #else /* Default */ intBuf1 = ptrToRecv; #endif MPI_Gatherv (supno_s, (int) nvtcs_loc, mpi_int_t, temp, nvtcs, intBuf1, mpi_int_t, 0, grid->comm); } else temp = supno_s; if (!iam) { nsupers = 0; p = (int) OWNER( globToLoc[0] ); gb = temp[ptrToRecv[p]]; supno_n[0] = nsupers; ptrToRecv[p] ++; szsn = 1; for (j = 1; j < n; j ++) { if (p != (int) OWNER( globToLoc[j] ) || szsn >= maxszsn || gb != temp[ptrToRecv[p]]) { nsupers ++; p = (int) OWNER( globToLoc[j] ); gb = temp[ptrToRecv[p]]; szsn = 1; } else { szsn ++; } ptrToRecv[p] ++; supno_n[j] = nsupers; } nsupers++; if (nprocs > 1) { SUPERLU_FREE (temp); memAux -= (float) (n+1) * iword; } supno_n[n] = nsupers; } /* reset to 0 nnzToSend */ for (p = 0; p < 2 *nprocs; p++) nnzToSend[p] = 0; MPI_Bcast (supno_n, n+1, mpi_int_t, 0, grid->comm); nsupers = supno_n[n]; /* Allocate space for storing Glu_persist_n. */ if ( !(xsup_n = intMalloc_dist(nsupers+1)) ) { fprintf (stderr, "Malloc fails for xsup_n[]."); return (memAux + memRet); } memRet += (float) (nsupers+1) * iword; /* ------------------------------------------------------------ COUNT THE NUMBER OF NONZEROS TO BE SENT TO EACH PROCESS, THEN ALLOCATE SPACE. THIS ACCOUNTS FOR THE FIRST PASS OF L and U. ------------------------------------------------------------*/ gb = EMPTY; for (i = 0; i < n; i++) { if (gb != supno_n[i]) { /* a new supernode starts */ gb = supno_n[i]; xsup_n[gb] = i; } } xsup_n[nsupers] = n; for (p = 0; p < nprocs; p++) { send_1[p] = FALSE; send_2[p] = FALSE; } for (gb_n = 0; gb_n < nsupers; gb_n ++) { i = xsup_n[gb_n]; if (iam == (int) OWNER( globToLoc[i] )) { pc = PCOL( gb_n, grid ); pr = PROW( gb_n, grid ); p_diag = PNUM( pr, pc, grid); i_loc = LOCAL_IND( globToLoc[i] ); gb_s = supno_s[i_loc]; fst_s = xsup_beg_s[gb_s]; lst_s = xsup_end_s[gb_s]; fst_s_l = LOCAL_IND( globToLoc[fst_s] ); for (j = xlsub_s[fst_s_l]; j < xlsub_s[fst_s_l+1]; j++) { k = lsub_s[j]; if (k >= i) { gb = supno_n[k]; p = (int) PNUM( PROW(gb, grid), pc, grid ); nnzToSend[2*p] ++; send_1[p] = TRUE; } } for (j = xusub_s[fst_s_l]; j < xusub_s[fst_s_l+1]; j++) { k = usub_s[j]; if (k >= i + xsup_n[gb_n+1] - xsup_n[gb_n]) { gb = supno_n[k]; p = PNUM( pr, PCOL(gb, grid), grid); nnzToSend[2*p+1] ++; send_2[p] = TRUE; } } nsend_2 = 0; for (p = pr * grid->npcol; p < (pr + 1) * grid->npcol; p++) { nnzToSend[2*p+1] += 2; if (send_2[p]) nsend_2 ++; } for (p = pr * grid->npcol; p < (pr + 1) * grid->npcol; p++) if (send_2[p] || p == p_diag) { if (p == p_diag && !send_2[p]) nnzToSend[2*p+1] += nsend_2; else nnzToSend[2*p+1] += nsend_2-1; send_2[p] = FALSE; } nsend_1 = 0; for (p = pc; p < nprocs; p += grid->npcol) { nnzToSend[2*p] += 2; if (send_1[p]) nsend_1 ++; } for (p = pc; p < nprocs; p += grid->npcol) if (send_1[p]) { nnzToSend[2*p] += nsend_1-1; send_1[p] = FALSE; } else nnzToSend[2*p] += nsend_1; } } /* All-to-all communication */ MPI_Alltoall( nnzToSend, 2, mpi_int_t, nnzToRecv, 2, mpi_int_t, grid->comm); nnz_loc_l = nnz_loc_u = 0; SendCnt_l = SendCnt_u = RecvCnt_l = RecvCnt_u = 0; for (p = 0; p < nprocs; p++) { if ( p != iam ) { SendCnt_l += nnzToSend[2*p]; nnzToSend_l[p] = nnzToSend[2*p]; SendCnt_u += nnzToSend[2*p+1]; nnzToSend_u[p] = nnzToSend[2*p+1]; RecvCnt_l += nnzToRecv[2*p]; nnzToRecv_l[p] = nnzToRecv[2*p]; RecvCnt_u += nnzToRecv[2*p+1]; nnzToRecv_u[p] = nnzToRecv[2*p+1]; } else { nnz_loc_l += nnzToRecv[2*p]; nnz_loc_u += nnzToRecv[2*p+1]; nnzToSend_l[p] = 0; nnzToSend_u[p] = 0; nnzToRecv_l[p] = nnzToRecv[2*p]; nnzToRecv_u[p] = nnzToRecv[2*p+1]; } } /* Allocate space for storing the symbolic structure after redistribution. */ nsupers_i = CEILING( nsupers, grid->nprow ); /* Number of local block rows */ nsupers_j = CEILING( nsupers, grid->npcol ); /* Number of local block columns */ if ( !(xlsub_n = intCalloc_dist(nsupers_j+1)) ) { fprintf (stderr, "Malloc fails for xlsub_n[]."); return (memAux + memRet); } memRet += (float) (nsupers_j+1) * iword; if ( !(xusub_n = intCalloc_dist(nsupers_i+1)) ) { fprintf (stderr, "Malloc fails for xusub_n[]."); return (memAux + memRet); } memRet += (float) (nsupers_i+1) * iword; /* Allocate temp storage for sending/receiving the L/U symbolic structure. */ if ( (RecvCnt_l + nnz_loc_l) || (RecvCnt_u + nnz_loc_u) ) { if (!(rcv_luind = intMalloc_dist(SUPERLU_MAX(RecvCnt_l+nnz_loc_l, RecvCnt_u+nnz_loc_u))) ) { fprintf (stderr, "Malloc fails for rcv_luind[]."); return (memAux + memRet); } memAux += (float) SUPERLU_MAX(RecvCnt_l+nnz_loc_l, RecvCnt_u+nnz_loc_u) * iword; } if ( nprocs > 1 && (SendCnt_l || SendCnt_u) ) { if (!(snd_luind = intMalloc_dist(SUPERLU_MAX(SendCnt_l, SendCnt_u))) ) { fprintf (stderr, "Malloc fails for index[]."); return (memAux + memRet); } memAux += (float) SUPERLU_MAX(SendCnt_l, SendCnt_u) * iword; } /* ------------------------------------------------------------------ LOAD THE SYMBOLIC STRUCTURE OF L AND U INTO THE STRUCTURES TO SEND. THIS ACCOUNTS FOR THE SECOND PASS OF L and U. ------------------------------------------------------------------*/ sendL = TRUE; sendU = FALSE; while (sendL || sendU) { if (sendL) { xsub_s = xlsub_s; sub_s = lsub_s; xsub_n = xlsub_n; nnzToSend = nnzToSend_l; nnzToRecv = nnzToRecv_l; } if (sendU) { xsub_s = xusub_s; sub_s = usub_s; xsub_n = xusub_n; nnzToSend = nnzToSend_u; nnzToRecv = nnzToRecv_u; } for (i = 0, j = 0, p = 0; p < nprocs; p++) { if ( p != iam ) { ptrToSend[p] = i; i += nnzToSend[p]; } ptrToRecv[p] = j; j += nnzToRecv[p]; } nnzToRecv[iam] = 0; ind_loc = ptrToRecv[iam]; for (gb_n = 0; gb_n < nsupers; gb_n++) { nsend_2 = 0; i = xsup_n[gb_n]; if (iam == OWNER( globToLoc[i] )) { pc = PCOL( gb_n, grid ); pr = PROW( gb_n, grid ); p_diag = PNUM( pr, pc, grid ); i_loc = LOCAL_IND( globToLoc[i] ); gb_s = supno_s[i_loc]; fst_s = xsup_beg_s[gb_s]; lst_s = xsup_end_s[gb_s]; fst_s_l = LOCAL_IND( globToLoc[fst_s] ); if (sendL) { p = pc; np = grid->nprow; } else { p = pr * grid->npcol; np = grid->npcol; } for (j = 0; j < np; j++) { if (p == iam) { rcv_luind[ind_loc] = gb_n; rcv_luind[ind_loc+1] = 0; tmp_ptrToSend[p] = ind_loc + 1; ind_loc += 2; } else { snd_luind[ptrToSend[p]] = gb_n; snd_luind[ptrToSend[p]+1] = 0; tmp_ptrToSend[p] = ptrToSend[p] + 1; ptrToSend[p] += 2; } if (sendL) p += grid->npcol; if (sendU) p++; } for (j = xsub_s[fst_s_l]; j < xsub_s[fst_s_l+1]; j++) { k = sub_s[j]; if ((sendL && k >= i) || (sendU && k >= i + xsup_n[gb_n+1] - xsup_n[gb_n])) { gb = supno_n[k]; if (sendL) p = PNUM( PROW(gb, grid), pc, grid ); else p = PNUM( pr, PCOL(gb, grid), grid); if (send_1[p] == FALSE) { send_1[p] = TRUE; send_2[nsend_2] = k; nsend_2 ++; } if (p == iam) { rcv_luind[ind_loc] = k; ind_loc++; if (sendL) xsub_n[LBj( gb_n, grid )] ++; else xsub_n[LBi( gb_n, grid )] ++; } else { snd_luind[ptrToSend[p]] = k; ptrToSend[p] ++; snd_luind[tmp_ptrToSend[p]] ++; } } } if (sendL) for (p = pc; p < nprocs; p += grid->npcol) { for (k = 0; k < nsend_2; k++) { gb = supno_n[send_2[k]]; if (PNUM(PROW(gb, grid), pc, grid) != p) { if (p == iam) { rcv_luind[ind_loc] = send_2[k]; ind_loc++; xsub_n[LBj( gb_n, grid )] ++; } else { snd_luind[ptrToSend[p]] = send_2[k]; ptrToSend[p] ++; snd_luind[tmp_ptrToSend[p]] ++; } } } send_1[p] = FALSE; } if (sendU) for (p = pr * grid->npcol; p < (pr + 1) * grid->npcol; p++) { if (send_1[p] || p == p_diag) { for (k = 0; k < nsend_2; k++) { gb = supno_n[send_2[k]]; if(PNUM( pr, PCOL(gb, grid), grid) != p) { if (p == iam) { rcv_luind[ind_loc] = send_2[k]; ind_loc++; xsub_n[LBi( gb_n, grid )] ++; } else { snd_luind[ptrToSend[p]] = send_2[k]; ptrToSend[p] ++; snd_luind[tmp_ptrToSend[p]] ++; } } } send_1[p] = FALSE; } } } } /* reset ptrToSnd to point to the beginning of the data for each processor (structure needed in MPI_Alltoallv) */ for (i = 0, p = 0; p < nprocs; p++) { ptrToSend[p] = i; i += nnzToSend[p]; } /* ------------------------------------------------------------ PERFORM REDISTRIBUTION. THIS INVOLVES ALL-TO-ALL COMMUNICATION. Note: it uses MPI_Alltoallv. ------------------------------------------------------------*/ if (nprocs > 1) { #if defined (_LONGINT) nnzToSend[iam] = 0; for (p=0; p INT_MAX || ptrToSend[p] > INT_MAX || nnzToRecv[p] > INT_MAX || ptrToRecv[p] > INT_MAX) ABORT("ERROR in dist_symbLU size to send > INT_MAX\n"); intBuf1[p] = (int) nnzToSend[p]; intBuf2[p] = (int) ptrToSend[p]; intBuf3[p] = (int) nnzToRecv[p]; intBuf4[p] = (int) ptrToRecv[p]; } #else /* Default */ intBuf1 = nnzToSend; intBuf2 = ptrToSend; intBuf3 = nnzToRecv; intBuf4 = ptrToRecv; #endif MPI_Alltoallv (snd_luind, intBuf1, intBuf2, mpi_int_t, rcv_luind, intBuf3, intBuf4, mpi_int_t, grid->comm); } if (sendL) nnzToRecv[iam] = nnz_loc_l; else nnzToRecv[iam] = nnz_loc_u; /* ------------------------------------------------------------ DEALLOCATE TEMPORARY STORAGE. -------------------------------------------------------------*/ if (sendU) if ( nprocs > 1 && (SendCnt_l || SendCnt_u) ) { SUPERLU_FREE (snd_luind); memAux -= (float) SUPERLU_MAX(SendCnt_l, SendCnt_u) * iword; } /* ------------------------------------------------------------ CONVERT THE FORMAT. ------------------------------------------------------------*/ /* Initialize the array of column of L/ row of U pointers */ k = 0; for (p = 0; p < nprocs; p ++) { if (p != iam) { i = k; while (i < k + nnzToRecv[p]) { gb = rcv_luind[i]; nelts = rcv_luind[i+1]; if (sendL) xsub_n[LBj( gb, grid )] = nelts; else xsub_n[LBi( gb, grid )] = nelts; i += nelts + 2; } } k += nnzToRecv[p]; } if (sendL) j = nsupers_j; else j = nsupers_i; k = 0; isize = xsub_n[0]; xsub_n[0] = 0; for (gb_l = 1; gb_l < j; gb_l++) { k += isize; isize = xsub_n[gb_l]; xsub_n[gb_l] = k; } xsub_n[gb_l] = k + isize; nnz_loc = xsub_n[gb_l]; if (sendL) { lsub_n = NULL; if (nnz_loc) { if ( !(lsub_n = intMalloc_dist(nnz_loc)) ) { fprintf (stderr, "Malloc fails for lsub_n[]."); return (memAux + memRet); } memRet += (float) (nnz_loc * iword); } sub_n = lsub_n; } if (sendU) { usub_n = NULL; if (nnz_loc) { if ( !(usub_n = intMalloc_dist(nnz_loc)) ) { fprintf (stderr, "Malloc fails for usub_n[]."); return (memAux + memRet); } memRet += (float) (nnz_loc * iword); } sub_n = usub_n; } /* Copy the data into the L column / U row oriented storage */ k = 0; for (p = 0; p < nprocs; p++) { i = k; while (i < k + nnzToRecv[p]) { gb = rcv_luind[i]; if (gb >= nsupers) printf ("Pe[%d] p %d gb " IFMT " nsupers " IFMT " i " IFMT " i-k " IFMT "\n", iam, p, gb, nsupers, i, i-k); i += 2; if (sendL) gb_l = LBj( gb, grid ); if (sendU) gb_l = LBi( gb, grid ); for (j = xsub_n[gb_l]; j < xsub_n[gb_l+1]; i++, j++) { sub_n[j] = rcv_luind[i]; } } k += nnzToRecv[p]; } if (sendL) { sendL = FALSE; sendU = TRUE; } else sendU = FALSE; } /* deallocate memory allocated during symbolic factorization routine */ if (rcv_luind != NULL) { SUPERLU_FREE (rcv_luind); memAux -= (float) SUPERLU_MAX(RecvCnt_l+nnz_loc_l, RecvCnt_u+nnz_loc_u) * iword; } SUPERLU_FREE (mem); memAux -= (float) (12 * nprocs * iword); SUPERLU_FREE(nvtcs); memAux -= (float) (5 * nprocs * sizeof(int)); if (xlsub_s != NULL) { SUPERLU_FREE (xlsub_s); SUPERLU_FREE (lsub_s); } if (xusub_s != NULL) { SUPERLU_FREE (xusub_s); SUPERLU_FREE (usub_s); } SUPERLU_FREE (globToLoc); if (supno_s != NULL) { SUPERLU_FREE (xsup_beg_s); SUPERLU_FREE (xsup_end_s); SUPERLU_FREE (supno_s); } Glu_persist->supno = supno_n; Glu_persist->xsup = xsup_n; *p_xlsub = xlsub_n; *p_lsub = lsub_n; *p_xusub = xusub_n; *p_usub = usub_n; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit dist_symbLU()"); #endif return (-memRet); } /*! \brief * *
 * Purpose
 * =======
 *   Re-distribute A on the 2D process mesh.  The lower part is
 *   stored using a column format and the upper part
 *   is stored using a row format.
 * 
 * Arguments
 * =========
 * 
 * A      (Input) SuperMatrix*
 *	  The distributed input matrix A of dimension (A->nrow, A->ncol).
 *        The type of A can be: Stype = SLU_NR_loc; Dtype = SLU_D; Mtype = SLU_GE.
 *
 * ScalePermstruct (Input) ScalePermstruct_t*
 *        The data structure to store the scaling and permutation vectors
 *        describing the transformations performed to the original matrix A.
 *
 * Glu_persist  (Input) Glu_persist_t *
 *        Information on supernodes mapping.
 * 
 * grid   (Input) gridinfo_t*
 *        The 2D process mesh.
 *
 * p_ainf_colptr (Output) int_t**
 *         Pointer to the lower part of A distributed on a 2D grid 
 *         of processors, stored by columns.
 *
 * p_ainf_rowind (Output) int_t**
 *         Structure of of the lower part of A distributed on a 
 *         2D grid of processors, stored by columns.
 *
 * p_ainf_val    (Output) double**
 *         Numerical values of the lower part of A, distributed on a 
 *         2D grid of processors, stored by columns.
 *
 * p_asup_rowptr (Output) int_t**
 *         Pointer to the upper part of A distributed on a 2D grid 
 *         of processors, stored by rows.
 *
 * p_asup_colind (Output) int_t**
 *         Structure of of the upper part of A distributed on a 
 *         2D grid of processors, stored by rows.
 *
 * p_asup_val    (Output) double**
 *         Numerical values of the upper part of A, distributed on a 
 *         2D grid of processors, stored by rows.
 *
 * ilsum_i  (Input) int_t *
 *       Starting position of each supernode in 
 *       the full array (local, block row wise).
 *
 * ilsum_j  (Input) int_t *
 *       Starting position of each supernode in 
 *       the full array (local, block column wise).
 *
 * Return value
 * ============
 *   < 0, number of bytes allocated on return from the dist_symbLU
 *   > 0, number of bytes allocated when out of memory.
 *        (an approximation).
 * 
*/ static float ddist_A(SuperMatrix *A, ScalePermstruct_t *ScalePermstruct, Glu_persist_t *Glu_persist, gridinfo_t *grid, int_t **p_ainf_colptr, int_t **p_ainf_rowind, double **p_ainf_val, int_t **p_asup_rowptr, int_t **p_asup_colind, double **p_asup_val, int_t *ilsum_i, int_t *ilsum_j ) { int iam, p, procs; NRformat_loc *Astore; int_t *perm_r; /* row permutation vector */ int_t *perm_c; /* column permutation vector */ int_t i, it, irow, fst_row, j, jcol, k, gbi, gbj, n, m_loc, jsize, isize; int_t nsupers, nsupers_i, nsupers_j; int_t nnz_loc, nnz_loc_ainf, nnz_loc_asup; /* number of local nonzeros */ int_t SendCnt; /* number of remote nonzeros to be sent */ int_t RecvCnt; /* number of remote nonzeros to be sent */ int_t *ainf_colptr, *ainf_rowind, *asup_rowptr, *asup_colind; double *asup_val, *ainf_val; int_t *nnzToSend, *nnzToRecv, maxnnzToRecv; int_t *ia, *ja, **ia_send, *index, *itemp; int_t *ptr_to_send; double *aij, **aij_send, *nzval, *dtemp; double *nzval_a; MPI_Request *send_req; MPI_Status status; int_t *xsup = Glu_persist->xsup; /* supernode and column mapping */ int_t *supno = Glu_persist->supno; float memAux; /* Memory used during this routine and freed on return */ float memRet; /* Memory allocated and not freed on return */ int_t iword, dword, szbuf; /* ------------------------------------------------------------ INITIALIZATION. ------------------------------------------------------------*/ iam = grid->iam; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter ddist_A()"); #endif iword = sizeof(int_t); dword = sizeof(double); perm_r = ScalePermstruct->perm_r; perm_c = ScalePermstruct->perm_c; procs = grid->nprow * grid->npcol; Astore = (NRformat_loc *) A->Store; n = A->ncol; m_loc = Astore->m_loc; fst_row = Astore->fst_row; if (!(nnzToRecv = intCalloc_dist(2*procs))) { fprintf (stderr, "Malloc fails for nnzToRecv[]."); return (ERROR_RET); } memAux = (float) (2 * procs * iword); memRet = 0.; nnzToSend = nnzToRecv + procs; nsupers = supno[n-1] + 1; /* ------------------------------------------------------------ COUNT THE NUMBER OF NONZEROS TO BE SENT TO EACH PROCESS, THEN ALLOCATE SPACE. THIS ACCOUNTS FOR THE FIRST PASS OF A. ------------------------------------------------------------*/ for (i = 0; i < m_loc; ++i) { for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) { irow = perm_c[perm_r[i+fst_row]]; /* Row number in Pc*Pr*A */ jcol = Astore->colind[j]; gbi = BlockNum( irow ); gbj = BlockNum( jcol ); p = PNUM( PROW(gbi,grid), PCOL(gbj,grid), grid ); ++nnzToSend[p]; } } /* All-to-all communication */ MPI_Alltoall( nnzToSend, 1, mpi_int_t, nnzToRecv, 1, mpi_int_t, grid->comm); maxnnzToRecv = 0; nnz_loc = SendCnt = RecvCnt = 0; for (p = 0; p < procs; ++p) { if ( p != iam ) { SendCnt += nnzToSend[p]; RecvCnt += nnzToRecv[p]; maxnnzToRecv = SUPERLU_MAX( nnzToRecv[p], maxnnzToRecv ); } else { nnz_loc += nnzToRecv[p]; /*assert(nnzToSend[p] == nnzToRecv[p]);*/ } } k = nnz_loc + RecvCnt; /* Total nonzeros ended up in my process. */ szbuf = k; /* Allocate space for storing the triplets after redistribution. */ if ( !(ia = intMalloc_dist(2*k)) ) { fprintf (stderr, "Malloc fails for ia[]."); return (memAux); } memAux += (float) (2*k*iword); ja = ia + k; if ( !(aij = doubleMalloc_dist(k)) ) { fprintf (stderr, "Malloc fails for aij[]."); return (memAux); } memAux += (float) (k*dword); /* Allocate temporary storage for sending/receiving the A triplets. */ if ( procs > 1 ) { if ( !(send_req = (MPI_Request *) SUPERLU_MALLOC(2*procs *sizeof(MPI_Request))) ) { fprintf (stderr, "Malloc fails for send_req[]."); return (memAux); } memAux += (float) (2*procs *sizeof(MPI_Request)); if ( !(ia_send = (int_t **) SUPERLU_MALLOC(procs*sizeof(int_t*))) ) { fprintf(stderr, "Malloc fails for ia_send[]."); return (memAux); } memAux += (float) (procs*sizeof(int_t*)); if ( !(aij_send = (double **)SUPERLU_MALLOC(procs*sizeof(double*))) ) { fprintf(stderr, "Malloc fails for aij_send[]."); return (memAux); } memAux += (float) (procs*sizeof(double*)); if ( !(index = intMalloc_dist(2*SendCnt)) ) { fprintf(stderr, "Malloc fails for index[]."); return (memAux); } memAux += (float) (2*SendCnt*iword); if ( !(nzval = doubleMalloc_dist(SendCnt)) ) { fprintf(stderr, "Malloc fails for nzval[]."); return (memAux); } memAux += (float) (SendCnt * dword); if ( !(ptr_to_send = intCalloc_dist(procs)) ) { fprintf(stderr, "Malloc fails for ptr_to_send[]."); return (memAux); } memAux += (float) (procs * iword); if ( !(itemp = intMalloc_dist(2*maxnnzToRecv)) ) { fprintf(stderr, "Malloc fails for itemp[]."); return (memAux); } memAux += (float) (2*maxnnzToRecv*iword); if ( !(dtemp = doubleMalloc_dist(maxnnzToRecv)) ) { fprintf(stderr, "Malloc fails for dtemp[]."); return (memAux); } memAux += (float) (maxnnzToRecv * dword); for (i = 0, j = 0, p = 0; p < procs; ++p) { if ( p != iam ) { ia_send[p] = &index[i]; i += 2 * nnzToSend[p]; /* ia/ja indices alternate */ aij_send[p] = &nzval[j]; j += nnzToSend[p]; } } } /* if procs > 1 */ nsupers_i = CEILING( nsupers, grid->nprow ); /* Number of local block rows */ nsupers_j = CEILING( nsupers, grid->npcol ); /* Number of local block columns */ if ( !(ainf_colptr = intCalloc_dist(ilsum_j[nsupers_j] + 1)) ) { fprintf (stderr, "Malloc fails for *ainf_colptr[]."); return (memAux); } memRet += (float) (ilsum_j[nsupers_j] + 1) * iword; if ( !(asup_rowptr = intCalloc_dist(ilsum_i[nsupers_i] + 1)) ) { fprintf (stderr, "Malloc fails for *asup_rowptr[]."); return (memAux+memRet); } memRet += (float) (ilsum_i[nsupers_i] + 1) * iword; /* ------------------------------------------------------------ LOAD THE ENTRIES OF A INTO THE (IA,JA,AIJ) STRUCTURES TO SEND. THIS ACCOUNTS FOR THE SECOND PASS OF A. ------------------------------------------------------------*/ nnz_loc = 0; /* Reset the local nonzero count. */ nnz_loc_ainf = nnz_loc_asup = 0; nzval_a = Astore->nzval; for (i = 0; i < m_loc; ++i) { for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) { irow = perm_c[perm_r[i+fst_row]]; /* Row number in Pc*Pr*A */ jcol = Astore->colind[j]; gbi = BlockNum( irow ); gbj = BlockNum( jcol ); p = PNUM( PROW(gbi,grid), PCOL(gbj,grid), grid ); if ( p != iam ) { /* remote */ k = ptr_to_send[p]; ia_send[p][k] = irow; ia_send[p][k + nnzToSend[p]] = jcol; aij_send[p][k] = nzval_a[j]; ++ptr_to_send[p]; } else { /* local */ ia[nnz_loc] = irow; ja[nnz_loc] = jcol; aij[nnz_loc] = nzval_a[j]; ++nnz_loc; /* Count nonzeros in each column of L / row of U */ if (gbi >= gbj) { ainf_colptr[ilsum_j[LBj( gbj, grid )] + jcol - FstBlockC( gbj )] ++; nnz_loc_ainf ++; } else { asup_rowptr[ilsum_i[LBi( gbi, grid )] + irow - FstBlockC( gbi )] ++; nnz_loc_asup ++; } } } } /* ------------------------------------------------------------ PERFORM REDISTRIBUTION. THIS INVOLVES ALL-TO-ALL COMMUNICATION. NOTE: Can possibly use MPI_Alltoallv. ------------------------------------------------------------*/ for (p = 0; p < procs; ++p) { if ( p != iam ) { it = 2*nnzToSend[p]; MPI_Isend( ia_send[p], it, mpi_int_t, p, iam, grid->comm, &send_req[p] ); it = nnzToSend[p]; MPI_Isend( aij_send[p], it, MPI_DOUBLE, p, iam+procs, grid->comm, &send_req[procs+p] ); } } for (p = 0; p < procs; ++p) { if ( p != iam ) { it = 2*nnzToRecv[p]; MPI_Recv( itemp, it, mpi_int_t, p, p, grid->comm, &status ); it = nnzToRecv[p]; MPI_Recv( dtemp, it, MPI_DOUBLE, p, p+procs, grid->comm, &status ); for (i = 0; i < nnzToRecv[p]; ++i) { ia[nnz_loc] = itemp[i]; irow = itemp[i]; jcol = itemp[i + nnzToRecv[p]]; /* assert(jcol= gbj) { ainf_colptr[ilsum_j[LBj( gbj, grid )] + jcol - FstBlockC( gbj )] ++; nnz_loc_ainf ++; } else { asup_rowptr[ilsum_i[LBi( gbi, grid )] + irow - FstBlockC( gbi )] ++; nnz_loc_asup ++; } } } } for (p = 0; p < procs; ++p) { if ( p != iam ) { MPI_Wait( &send_req[p], &status); MPI_Wait( &send_req[procs+p], &status); } } /* ------------------------------------------------------------ DEALLOCATE TEMPORARY STORAGE ------------------------------------------------------------*/ SUPERLU_FREE(nnzToRecv); memAux -= 2 * procs * iword; if ( procs > 1 ) { SUPERLU_FREE(send_req); SUPERLU_FREE(ia_send); SUPERLU_FREE(aij_send); SUPERLU_FREE(index); SUPERLU_FREE(nzval); SUPERLU_FREE(ptr_to_send); SUPERLU_FREE(itemp); SUPERLU_FREE(dtemp); memAux -= 2*procs *sizeof(MPI_Request) + procs*sizeof(int_t*) + procs*sizeof(double*) + 2*SendCnt * iword + SendCnt* dword + procs*iword + 2*maxnnzToRecv*iword + maxnnzToRecv*dword; } /* ------------------------------------------------------------ CONVERT THE TRIPLET FORMAT. ------------------------------------------------------------*/ if (nnz_loc_ainf != 0) { if ( !(ainf_rowind = intMalloc_dist(nnz_loc_ainf)) ) { fprintf (stderr, "Malloc fails for *ainf_rowind[]."); return (memAux+memRet); } memRet += (float) (nnz_loc_ainf * iword); if ( !(ainf_val = doubleMalloc_dist(nnz_loc_ainf)) ) { fprintf (stderr, "Malloc fails for *ainf_val[]."); return (memAux+memRet); } memRet += (float) (nnz_loc_ainf * dword); } else { ainf_rowind = NULL; ainf_val = NULL; } if (nnz_loc_asup != 0) { if ( !(asup_colind = intMalloc_dist(nnz_loc_asup)) ) { fprintf (stderr, "Malloc fails for *asup_colind[]."); return (memAux + memRet); } memRet += (float) (nnz_loc_asup * iword); if ( !(asup_val = doubleMalloc_dist(nnz_loc_asup)) ) { fprintf (stderr, "Malloc fails for *asup_val[]."); return (memAux + memRet); } memRet += (float) (nnz_loc_asup * dword); } else { asup_colind = NULL; asup_val = NULL; } /* Initialize the array of column pointers */ k = 0; jsize = ainf_colptr[0]; ainf_colptr[0] = 0; for (j = 1; j < ilsum_j[nsupers_j]; j++) { k += jsize; jsize = ainf_colptr[j]; ainf_colptr[j] = k; } ainf_colptr[ilsum_j[nsupers_j]] = k + jsize; i = 0; isize = asup_rowptr[0]; asup_rowptr[0] = 0; for (j = 1; j < ilsum_i[nsupers_i]; j++) { i += isize; isize = asup_rowptr[j]; asup_rowptr[j] = i; } asup_rowptr[ilsum_i[nsupers_i]] = i + isize; /* Copy the triplets into the column oriented storage */ for (i = 0; i < nnz_loc; ++i) { jcol = ja[i]; irow = ia[i]; gbi = BlockNum( irow ); gbj = BlockNum( jcol ); /* Count nonzeros in each column of L / row of U */ if (gbi >= gbj) { j = ilsum_j[LBj( gbj, grid )] + jcol - FstBlockC( gbj ); k = ainf_colptr[j]; ainf_rowind[k] = irow; ainf_val[k] = aij[i]; ainf_colptr[j] ++; } else { j = ilsum_i[LBi( gbi, grid )] + irow - FstBlockC( gbi ); k = asup_rowptr[j]; asup_colind[k] = jcol; asup_val[k] = aij[i]; asup_rowptr[j] ++; } } /* Reset the column pointers to the beginning of each column */ for (j = ilsum_j[nsupers_j]; j > 0; j--) ainf_colptr[j] = ainf_colptr[j-1]; for (j = ilsum_i[nsupers_i]; j > 0; j--) asup_rowptr[j] = asup_rowptr[j-1]; ainf_colptr[0] = 0; asup_rowptr[0] = 0; SUPERLU_FREE(ia); SUPERLU_FREE(aij); memAux -= 2*szbuf*iword + szbuf*dword; *p_ainf_colptr = ainf_colptr; *p_ainf_rowind = ainf_rowind; *p_ainf_val = ainf_val; *p_asup_rowptr = asup_rowptr; *p_asup_colind = asup_colind; *p_asup_val = asup_val; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit ddist_A()"); fprintf (stdout, "Size of allocated memory (MB) %.3f\n", memRet*1e-6); #endif return (-memRet); } /* dist_A */ /*! \brief * *
 * Purpose
 * =======
 *   Distribute the input matrix onto the 2D process mesh.
 * 
 * Arguments
 * =========
 * 
 * fact (input) fact_t
 *        Specifies whether or not the L and U structures will be re-used.
 *        = SamePattern_SameRowPerm: L and U structures are input, and
 *                                   unchanged on exit.
 *          This routine should not be called for this case, an error
 *          is generated.  Instead, pddistribute routine should be called.
 *        = DOFACT or SamePattern: L and U structures are computed and output.
 *
 * n      (Input) int
 *        Dimension of the matrix.
 *
 * A      (Input) SuperMatrix*
 *	  The distributed input matrix A of dimension (A->nrow, A->ncol).
 *        A may be overwritten by diag(R)*A*diag(C)*Pc^T.
 *        The type of A can be: Stype = NR; Dtype = SLU_D; Mtype = GE.
 *
 * ScalePermstruct (Input) ScalePermstruct_t*
 *        The data structure to store the scaling and permutation vectors
 *        describing the transformations performed to the original matrix A.
 *
 * Glu_freeable (Input) *Glu_freeable_t
 *        The global structure describing the graph of L and U.
 * 
 * LUstruct (Input) LUstruct_t*
 *        Data structures for L and U factors.
 *
 * grid   (Input) gridinfo_t*
 *        The 2D process mesh.
 *
 * Return value
 * ============
 *   < 0, number of bytes allocated on return from the dist_symbLU
 *   > 0, number of bytes allocated for performing the distribution
 *       of the data, when out of memory.
 *        (an approximation).
 * 
*/ float ddist_psymbtonum(fact_t fact, int_t n, SuperMatrix *A, ScalePermstruct_t *ScalePermstruct, Pslu_freeable_t *Pslu_freeable, LUstruct_t *LUstruct, gridinfo_t *grid) { Glu_persist_t *Glu_persist = LUstruct->Glu_persist; Glu_freeable_t Glu_freeable_n; LocalLU_t *Llu = LUstruct->Llu; int_t bnnz, fsupc, i, irow, istart, j, jb, jj, k, len, len1, nsupc, nsupc_gb, ii, nprocs; int_t ljb; /* local block column number */ int_t nrbl; /* number of L blocks in current block column */ int_t nrbu; /* number of U blocks in current block column */ int_t gb; /* global block number; 0 < gb <= nsuper */ int_t lb; /* local block number; 0 < lb <= ceil(NSUPERS/Pr) */ int iam, jbrow, jbcol, jcol, kcol, mycol, myrow, pc, pr, ljb_i, ljb_j, p; int_t mybufmax[NBUFFERS]; NRformat_loc *Astore; double *a; int_t *asub, *xa; int_t *ainf_colptr, *ainf_rowind, *asup_rowptr, *asup_colind; double *asup_val, *ainf_val; int_t *xsup, *supno; /* supernode and column mapping */ int_t *lsub, *xlsub, *usub, *xusub; int_t nsupers, nsupers_i, nsupers_j, nsupers_ij; int_t next_ind; /* next available position in index[*] */ int_t next_val; /* next available position in nzval[*] */ int_t *index; /* indices consist of headers and row subscripts */ int *index1; /* temporary pointer to array of int */ double *lusup, *uval; /* nonzero values in L and U */ int_t *recvBuf; int *ptrToRecv, *nnzToRecv, *ptrToSend, *nnzToSend; double **Lnzval_bc_ptr; /* size ceil(NSUPERS/Pc) */ int_t **Lrowind_bc_ptr; /* size ceil(NSUPERS/Pc) */ double **Unzval_br_ptr; /* size ceil(NSUPERS/Pr) */ int_t **Ufstnz_br_ptr; /* size ceil(NSUPERS/Pr) */ /*-- Counts to be used in factorization. --*/ int *ToRecv, *ToSendD, **ToSendR; /*-- Counts to be used in lower triangular solve. --*/ int_t *fmod; /* Modification count for L-solve. */ int_t **fsendx_plist; /* Column process list to send down Xk. */ int_t nfrecvx = 0; /* Number of Xk I will receive. */ int_t nfsendx = 0; /* Number of Xk I will send */ int_t kseen; /*-- Counts to be used in upper triangular solve. --*/ int_t *bmod; /* Modification count for U-solve. */ int_t **bsendx_plist; /* Column process list to send down Xk. */ int_t nbrecvx = 0; /* Number of Xk I will receive. */ int_t nbsendx = 0; /* Number of Xk I will send */ int_t *ilsum; /* starting position of each supernode in the full array (local) */ int_t *ilsum_j, ldaspa_j; /* starting position of each supernode in the full array (local, block column wise) */ /*-- Auxiliary arrays; freed on return --*/ int_t *Urb_marker; /* block hit marker; size ceil(NSUPERS/Pr) */ int_t *LUb_length; /* L,U block length; size nsupers_ij */ int_t *LUb_indptr; /* pointers to L,U index[]; size nsupers_ij */ int_t *LUb_number; /* global block number; size nsupers_ij */ int_t *LUb_valptr; /* pointers to U nzval[]; size ceil(NSUPERS/Pc) */ int_t *Lrb_marker; /* block hit marker; size ceil(NSUPERS/Pr) */ double *dense, *dense_col; /* SPA */ double zero = 0.0; int_t ldaspa; /* LDA of SPA */ int_t iword, dword; float memStrLU, memA, memDist = 0.; /* memory used for redistributing the data, which does not include the memory for the numerical values of L and U (positive number)*/ float memNLU = 0.; /* memory allocated for storing the numerical values of L and U, that will be used in the numeric factorization (positive number) */ #if ( PRNTlevel>=1 ) int_t nLblocks = 0, nUblocks = 0; #endif /* Initialization. */ iam = grid->iam; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter dist_psymbtonum()"); #endif myrow = MYROW( iam, grid ); mycol = MYCOL( iam, grid ); nprocs = grid->npcol * grid->nprow; for (i = 0; i < NBUFFERS; ++i) mybufmax[i] = 0; Astore = (NRformat_loc *) A->Store; iword = sizeof(int_t); dword = sizeof(double); if (fact == SamePattern_SameRowPerm) { ABORT ("ERROR: call of dist_psymbtonum with fact equals SamePattern_SameRowPerm."); } if ((memStrLU = dist_symbLU (n, Pslu_freeable, Glu_persist, &xlsub, &lsub, &xusub, &usub, grid)) > 0) return (memStrLU); memDist += (-memStrLU); xsup = Glu_persist->xsup; /* supernode and column mapping */ supno = Glu_persist->supno; nsupers = supno[n-1] + 1; nsupers_i = CEILING( nsupers, grid->nprow );/* No of local row blocks */ nsupers_j = CEILING( nsupers, grid->npcol );/* No of local column blocks */ nsupers_ij = SUPERLU_MAX(nsupers_i, nsupers_j); if ( !(ilsum = intMalloc_dist(nsupers_i+1)) ) { fprintf (stderr, "Malloc fails for ilsum[]."); return (memDist + memNLU); } memNLU += (nsupers_i+1) * iword; if ( !(ilsum_j = intMalloc_dist(nsupers_j+1)) ) { fprintf (stderr, "Malloc fails for ilsum_j[]."); return (memDist + memNLU); } memDist += (nsupers_j+1) * iword; /* Compute ldaspa and ilsum[], ldaspa_j and ilsum_j[]. */ ilsum[0] = 0; ldaspa = 0; for (gb = 0; gb < nsupers; gb++) if ( myrow == PROW( gb, grid ) ) { i = SuperSize( gb ); ldaspa += i; lb = LBi( gb, grid ); ilsum[lb + 1] = ilsum[lb] + i; } ilsum[nsupers_i] = ldaspa; ldaspa_j = 0; ilsum_j[0] = 0; for (gb = 0; gb < nsupers; gb++) if (mycol == PCOL( gb, grid )) { i = SuperSize( gb ); ldaspa_j += i; lb = LBj( gb, grid ); ilsum_j[lb + 1] = ilsum_j[lb] + i; } ilsum_j[nsupers_j] = ldaspa_j; if ((memA = ddist_A(A, ScalePermstruct, Glu_persist, grid, &ainf_colptr, &ainf_rowind, &ainf_val, &asup_rowptr, &asup_colind, &asup_val, ilsum, ilsum_j)) > 0) return (memDist + memA + memNLU); memDist += (-memA); /* ------------------------------------------------------------ FIRST TIME CREATING THE L AND U DATA STRUCTURES. ------------------------------------------------------------*/ /* We first need to set up the L and U data structures and then * propagate the values of A into them. */ if ( !(ToRecv = SUPERLU_MALLOC(nsupers * sizeof(int))) ) { fprintf(stderr, "Calloc fails for ToRecv[]."); return (memDist + memNLU); } for (i = 0; i < nsupers; ++i) ToRecv[i] = 0; memNLU += nsupers * iword; k = CEILING( nsupers, grid->npcol ); /* Number of local column blocks */ if ( !(ToSendR = (int **) SUPERLU_MALLOC(k*sizeof(int*))) ) { fprintf(stderr, "Malloc fails for ToSendR[]."); return (memDist + memNLU); } memNLU += k*sizeof(int_t*); j = k * grid->npcol; if ( !(index1 = SUPERLU_MALLOC(j * sizeof(int))) ) { fprintf(stderr, "Malloc fails for index[]."); return (memDist + memNLU); } memNLU += j*iword; for (i = 0; i < j; ++i) index1[i] = EMPTY; for (i = 0,j = 0; i < k; ++i, j += grid->npcol) ToSendR[i] = &index1[j]; /* Auxiliary arrays used to set up L and U block data structures. They are freed on return. */ if ( !(LUb_length = intCalloc_dist(nsupers_ij)) ) { fprintf(stderr, "Calloc fails for LUb_length[]."); return (memDist + memNLU); } if ( !(LUb_indptr = intMalloc_dist(nsupers_ij)) ) { fprintf(stderr, "Malloc fails for LUb_indptr[]."); return (memDist + memNLU); } if ( !(LUb_number = intCalloc_dist(nsupers_ij)) ) { fprintf(stderr, "Calloc fails for LUb_number[]."); return (memDist + memNLU); } if ( !(LUb_valptr = intCalloc_dist(nsupers_ij)) ) { fprintf(stderr, "Calloc fails for LUb_valptr[]."); return (memDist + memNLU); } memDist += 4 * nsupers_ij * iword; k = CEILING( nsupers, grid->nprow ); /* Pointers to the beginning of each block row of U. */ if ( !(Unzval_br_ptr = (double**)SUPERLU_MALLOC(nsupers_i * sizeof(double*))) ) { fprintf(stderr, "Malloc fails for Unzval_br_ptr[]."); return (memDist + memNLU); } if ( !(Ufstnz_br_ptr = (int_t**)SUPERLU_MALLOC(nsupers_i * sizeof(int_t*))) ) { fprintf(stderr, "Malloc fails for Ufstnz_br_ptr[]."); return (memDist + memNLU); } memNLU += nsupers_i*sizeof(double*) + nsupers_i*sizeof(int_t*); Unzval_br_ptr[nsupers_i-1] = NULL; Ufstnz_br_ptr[nsupers_i-1] = NULL; if ( !(ToSendD = SUPERLU_MALLOC(nsupers_i * sizeof(int))) ) { fprintf(stderr, "Malloc fails for ToSendD[]."); return (memDist + memNLU); } for (i = 0; i < nsupers_i; ++i) ToSendD[i] = NO; memNLU += nsupers_i*iword; if ( !(Urb_marker = intCalloc_dist(nsupers_j))) { fprintf(stderr, "Calloc fails for rb_marker[]."); return (memDist + memNLU); } if ( !(Lrb_marker = intCalloc_dist( nsupers_i ))) { fprintf(stderr, "Calloc fails for rb_marker[]."); return (memDist + memNLU); } memDist += (nsupers_i + nsupers_j)*iword; /* Auxiliary arrays used to set up L, U block data structures. They are freed on return. k is the number of local row blocks. */ if ( !(dense = doubleCalloc_dist(SUPERLU_MAX(ldaspa, ldaspa_j) * sp_ienv_dist(3))) ) { fprintf(stderr, "Calloc fails for SPA dense[]."); return (memDist + memNLU); } /* These counts will be used for triangular solves. */ if ( !(fmod = intCalloc_dist(nsupers_i)) ) { fprintf(stderr, "Calloc fails for fmod[]."); return (memDist + memNLU); } if ( !(bmod = intCalloc_dist(nsupers_i)) ) { fprintf(stderr, "Calloc fails for bmod[]."); return (memDist + memNLU); } /* ------------------------------------------------ */ memNLU += 2*nsupers_i*iword + SUPERLU_MAX(ldaspa, ldaspa_j)*sp_ienv_dist(3)*dword; /* Pointers to the beginning of each block column of L. */ if ( !(Lnzval_bc_ptr = (double**)SUPERLU_MALLOC(nsupers_j * sizeof(double*))) ) { fprintf(stderr, "Malloc fails for Lnzval_bc_ptr[]."); return (memDist + memNLU); } if ( !(Lrowind_bc_ptr = (int_t**)SUPERLU_MALLOC(nsupers_j * sizeof(int_t*))) ) { fprintf(stderr, "Malloc fails for Lrowind_bc_ptr[]."); return (memDist + memNLU); } memNLU += nsupers_j * sizeof(double*) + nsupers_j * sizeof(int_t*); Lnzval_bc_ptr[nsupers_j-1] = NULL; Lrowind_bc_ptr[nsupers_j-1] = NULL; /* These lists of processes will be used for triangular solves. */ if ( !(fsendx_plist = (int_t **) SUPERLU_MALLOC(nsupers_j*sizeof(int_t*))) ) { fprintf(stderr, "Malloc fails for fsendx_plist[]."); return (memDist + memNLU); } len = nsupers_j * grid->nprow; if ( !(index = intMalloc_dist(len)) ) { fprintf(stderr, "Malloc fails for fsendx_plist[0]"); return (memDist + memNLU); } for (i = 0; i < len; ++i) index[i] = EMPTY; for (i = 0, j = 0; i < nsupers_j; ++i, j += grid->nprow) fsendx_plist[i] = &index[j]; if ( !(bsendx_plist = (int_t **) SUPERLU_MALLOC(nsupers_j*sizeof(int_t*))) ) { fprintf(stderr, "Malloc fails for bsendx_plist[]."); return (memDist + memNLU); } if ( !(index = intMalloc_dist(len)) ) { fprintf(stderr, "Malloc fails for bsendx_plist[0]"); return (memDist + memNLU); } for (i = 0; i < len; ++i) index[i] = EMPTY; for (i = 0, j = 0; i < nsupers_j; ++i, j += grid->nprow) bsendx_plist[i] = &index[j]; /* -------------------------------------------------------------- */ memNLU += 2*nsupers_j*sizeof(int_t*) + 2*len*iword; /*------------------------------------------------------------ PROPAGATE ROW SUBSCRIPTS AND VALUES OF A INTO L AND U BLOCKS. THIS ACCOUNTS FOR ONE-PASS PROCESSING OF A, L AND U. ------------------------------------------------------------*/ for (jb = 0; jb < nsupers; jb++) { jbcol = PCOL( jb, grid ); jbrow = PROW( jb, grid ); ljb_j = LBj( jb, grid ); /* Local block number column wise */ ljb_i = LBi( jb, grid); /* Local block number row wise */ fsupc = FstBlockC( jb ); nsupc = SuperSize( jb ); if ( myrow == jbrow ) { /* Block row jb in my process row */ /* Scatter A into SPA. */ for (j = ilsum[ljb_i], dense_col = dense; j < ilsum[ljb_i]+nsupc; j++) { for (i = asup_rowptr[j]; i < asup_rowptr[j+1]; i++) { if (i >= asup_rowptr[ilsum[nsupers_i]]) printf ("ERR7\n"); jcol = asup_colind[i]; if (jcol >= n) printf ("Pe[%d] ERR distsn jb " IFMT " gb " IFMT " j " IFMT " jcol %d\n", iam, jb, gb, j, jcol); gb = BlockNum( jcol ); lb = LBj( gb, grid ); if (gb >= nsupers || lb >= nsupers_j) printf ("ERR8\n"); jcol = ilsum_j[lb] + jcol - FstBlockC( gb ); if (jcol >= ldaspa_j) printf ("Pe[%d] ERR1 jb " IFMT " gb " IFMT " j " IFMT " jcol %d\n", iam, jb, gb, j, jcol); dense_col[jcol] = asup_val[i]; } dense_col += ldaspa_j; } /*------------------------------------------------ * SET UP U BLOCKS. *------------------------------------------------*/ /* Count number of blocks and length of each block. */ nrbu = 0; len = 0; /* Number of column subscripts I own. */ len1 = 0; /* number of fstnz subscripts */ for (i = xusub[ljb_i]; i < xusub[ljb_i+1]; i++) { if (i >= xusub[nsupers_i]) printf ("ERR10\n"); jcol = usub[i]; gb = BlockNum( jcol ); /* Global block number */ /*if (fsupc <= 146445 && 146445 < fsupc + nsupc && jcol == 397986) printf ("Pe[%d] [%d %d] elt [%d] jbcol %d pc %d\n", iam, jb, gb, jcol, jbcol, pc); */ lb = LBj( gb, grid ); /* Local block number */ pc = PCOL( gb, grid ); /* Process col owning this block */ if (mycol == jbcol) ToSendR[ljb_j][pc] = YES; /* if (mycol == jbcol && mycol != pc) ToSendR[ljb_j][pc] = YES; */ pr = PROW( gb, grid ); if ( pr != jbrow && mycol == pc) bsendx_plist[lb][jbrow] = YES; if (mycol == pc) { len += nsupc; LUb_length[lb] += nsupc; ToSendD[ljb_i] = YES; if (Urb_marker[lb] <= jb) { /* First see this block */ if (Urb_marker[lb] == FALSE && gb != jb && myrow != pr) nbrecvx ++; Urb_marker[lb] = jb + 1; LUb_number[nrbu] = gb; /* if (gb == 391825 && jb == 145361) printf ("Pe[%d] T1 [%d %d] nrbu %d \n", iam, jb, gb, nrbu); */ nrbu ++; len1 += SuperSize( gb ); if ( gb != jb )/* Exclude diagonal block. */ ++bmod[ljb_i];/* Mod. count for back solve */ #if ( PRNTlevel>=1 ) ++nUblocks; #endif } } } /* for i ... */ if ( nrbu ) { /* Sort the blocks of U in increasing block column index. SuperLU_DIST assumes this is true */ /* simple insert sort algorithm */ /* to be transformed in quick sort */ for (j = 1; j < nrbu; j++) { k = LUb_number[j]; for (i=j-1; i>=0 && LUb_number[i] > k; i--) { LUb_number[i+1] = LUb_number[i]; } LUb_number[i+1] = k; } /* Set up the initial pointers for each block in index[] and nzval[]. */ /* Add room for descriptors */ len1 += BR_HEADER + nrbu * UB_DESCRIPTOR; if ( !(index = intMalloc_dist(len1+1)) ) { fprintf (stderr, "Malloc fails for Uindex[]"); return (memDist + memNLU); } Ufstnz_br_ptr[ljb_i] = index; if (!(Unzval_br_ptr[ljb_i] = doubleMalloc_dist(len))) { fprintf (stderr, "Malloc fails for Unzval_br_ptr[*][]"); return (memDist + memNLU); } memNLU += (len1+1)*iword + len*dword; uval = Unzval_br_ptr[ljb_i]; mybufmax[2] = SUPERLU_MAX( mybufmax[2], len1 ); mybufmax[3] = SUPERLU_MAX( mybufmax[3], len ); index[0] = nrbu; /* Number of column blocks */ index[1] = len; /* Total length of nzval[] */ index[2] = len1; /* Total length of index */ index[len1] = -1; /* End marker */ next_ind = BR_HEADER; next_val = 0; for (k = 0; k < nrbu; k++) { gb = LUb_number[k]; lb = LBj( gb, grid ); len = LUb_length[lb]; LUb_length[lb] = 0; /* Reset vector of block length */ index[next_ind++] = gb; /* Descriptor */ index[next_ind++] = len; LUb_indptr[lb] = next_ind; for (; next_ind < LUb_indptr[lb] + SuperSize( gb ); next_ind++) index[next_ind] = FstBlockC( jb + 1 ); LUb_valptr[lb] = next_val; next_val += len; } /* Propagate the fstnz subscripts to Ufstnz_br_ptr[], and the initial values of A from SPA into Unzval_br_ptr[]. */ for (i = xusub[ljb_i]; i < xusub[ljb_i+1]; i++) { jcol = usub[i]; gb = BlockNum( jcol ); if ( mycol == PCOL( gb, grid ) ) { lb = LBj( gb, grid ); k = LUb_indptr[lb]; /* Start fstnz in index */ index[k + jcol - FstBlockC( gb )] = FstBlockC( jb ); } } /* for i ... */ for (i = 0; i < nrbu; i++) { gb = LUb_number[i]; lb = LBj( gb, grid ); next_ind = LUb_indptr[lb]; k = FstBlockC( jb + 1); jcol = ilsum_j[lb]; for (jj = 0; jj < SuperSize( gb ); jj++, jcol++) { dense_col = dense; j = index[next_ind+jj]; for (ii = j; ii < k; ii++) { uval[LUb_valptr[lb]++] = dense_col[jcol]; dense_col[jcol] = zero; dense_col += ldaspa_j; } } } } else { Ufstnz_br_ptr[ljb_i] = NULL; Unzval_br_ptr[ljb_i] = NULL; } /* if nrbu ... */ } /* if myrow == jbrow */ /*------------------------------------------------ * SET UP L BLOCKS. *------------------------------------------------*/ if (mycol == jbcol) { /* Block column jb in my process column */ /* Scatter A_inf into SPA. */ for (j = ilsum_j[ljb_j], dense_col = dense; j < ilsum_j[ljb_j] + nsupc; j++) { for (i = ainf_colptr[j]; i < ainf_colptr[j+1]; i++) { irow = ainf_rowind[i]; if (irow >= n) printf ("Pe[%d] ERR1\n", iam); gb = BlockNum( irow ); if (gb >= nsupers) printf ("Pe[%d] ERR5\n", iam); if ( myrow == PROW( gb, grid ) ) { lb = LBi( gb, grid ); irow = ilsum[lb] + irow - FstBlockC( gb ); if (irow >= ldaspa) printf ("Pe[%d] ERR0\n", iam); dense_col[irow] = ainf_val[i]; } } dense_col += ldaspa; } /* sort the indices of the diagonal block at the beginning of xlsub */ if (myrow == jbrow) { k = xlsub[ljb_j]; for (i = xlsub[ljb_j]; i < xlsub[ljb_j+1]; i++) { irow = lsub[i]; if (irow < nsupc + fsupc && i != k+irow-fsupc) { lsub[i] = lsub[k + irow - fsupc]; lsub[k + irow - fsupc] = irow; i --; } } } /* Count number of blocks and length of each block. */ nrbl = 0; len = 0; /* Number of row subscripts I own. */ kseen = 0; for (i = xlsub[ljb_j]; i < xlsub[ljb_j+1]; i++) { irow = lsub[i]; gb = BlockNum( irow ); /* Global block number */ pr = PROW( gb, grid ); /* Process row owning this block */ if ( pr != jbrow && fsendx_plist[ljb_j][pr] == EMPTY && myrow == jbrow) { fsendx_plist[ljb_j][pr] = YES; ++nfsendx; } if ( myrow == pr ) { lb = LBi( gb, grid ); /* Local block number */ if (Lrb_marker[lb] <= jb) { /* First see this block */ Lrb_marker[lb] = jb + 1; LUb_length[lb] = 1; LUb_number[nrbl++] = gb; if ( gb != jb ) /* Exclude diagonal block. */ ++fmod[lb]; /* Mod. count for forward solve */ if ( kseen == 0 && myrow != jbrow ) { ++nfrecvx; kseen = 1; } #if ( PRNTlevel>=1 ) ++nLblocks; #endif } else ++LUb_length[lb]; ++len; } } /* for i ... */ if ( nrbl ) { /* Do not ensure the blocks are sorted! */ /* Set up the initial pointers for each block in index[] and nzval[]. */ /* If I am the owner of the diagonal block, order it first in LUb_number. Necessary for SuperLU_DIST routines */ kseen = EMPTY; for (j = 0; j < nrbl; j++) { if (LUb_number[j] == jb) kseen = j; } if (kseen != EMPTY && kseen != 0) { LUb_number[kseen] = LUb_number[0]; LUb_number[0] = jb; } /* Add room for descriptors */ len1 = len + BC_HEADER + nrbl * LB_DESCRIPTOR; if ( !(index = intMalloc_dist(len1)) ) { fprintf (stderr, "Malloc fails for index[]"); return (memDist + memNLU); } Lrowind_bc_ptr[ljb_j] = index; if (!(Lnzval_bc_ptr[ljb_j] = doubleMalloc_dist(len*nsupc))) { fprintf(stderr, "Malloc fails for Lnzval_bc_ptr[*][] col block " IFMT, jb); return (memDist + memNLU); } memNLU += len1*iword + len*nsupc*dword; lusup = Lnzval_bc_ptr[ljb_j]; mybufmax[0] = SUPERLU_MAX( mybufmax[0], len1 ); mybufmax[1] = SUPERLU_MAX( mybufmax[1], len*nsupc ); mybufmax[4] = SUPERLU_MAX( mybufmax[4], len ); index[0] = nrbl; /* Number of row blocks */ index[1] = len; /* LDA of the nzval[] */ next_ind = BC_HEADER; next_val = 0; for (k = 0; k < nrbl; ++k) { gb = LUb_number[k]; lb = LBi( gb, grid ); len = LUb_length[lb]; LUb_length[lb] = 0; index[next_ind++] = gb; /* Descriptor */ index[next_ind++] = len; LUb_indptr[lb] = next_ind; LUb_valptr[lb] = next_val; next_ind += len; next_val += len; } /* Propagate the compressed row subscripts to Lindex[], and the initial values of A from SPA into Lnzval[]. */ len = index[1]; /* LDA of lusup[] */ for (i = xlsub[ljb_j]; i < xlsub[ljb_j+1]; i++) { irow = lsub[i]; gb = BlockNum( irow ); if ( myrow == PROW( gb, grid ) ) { lb = LBi( gb, grid ); k = LUb_indptr[lb]++; /* Random access a block */ index[k] = irow; k = LUb_valptr[lb]++; irow = ilsum[lb] + irow - FstBlockC( gb ); for (j = 0, dense_col = dense; j < nsupc; ++j) { lusup[k] = dense_col[irow]; dense_col[irow] = zero; k += len; dense_col += ldaspa; } } } /* for i ... */ } else { Lrowind_bc_ptr[ljb_j] = NULL; Lnzval_bc_ptr[ljb_j] = NULL; } /* if nrbl ... */ } /* if mycol == pc */ } /* for jb ... */ SUPERLU_FREE(ilsum_j); SUPERLU_FREE(Urb_marker); SUPERLU_FREE(LUb_length); SUPERLU_FREE(LUb_indptr); SUPERLU_FREE(LUb_number); SUPERLU_FREE(LUb_valptr); SUPERLU_FREE(Lrb_marker); SUPERLU_FREE(dense); /* Free the memory used for storing L and U */ SUPERLU_FREE(xlsub); SUPERLU_FREE(xusub); if (lsub != NULL) SUPERLU_FREE(lsub); if (usub != NULL) SUPERLU_FREE(usub); /* Free the memory used for storing A */ SUPERLU_FREE(ainf_colptr); if (ainf_rowind != NULL) { SUPERLU_FREE(ainf_rowind); SUPERLU_FREE(ainf_val); } SUPERLU_FREE(asup_rowptr); if (asup_colind != NULL) { SUPERLU_FREE(asup_colind); SUPERLU_FREE(asup_val); } /* exchange information about bsendx_plist in between column of processors */ k = SUPERLU_MAX( grid->nprow, grid->npcol); if ( !(recvBuf = (int_t *) SUPERLU_MALLOC(nsupers*k*iword)) ) { fprintf (stderr, "Malloc fails for recvBuf[]."); return (memDist + memNLU); } if ( !(nnzToRecv = (int *) SUPERLU_MALLOC(nprocs*sizeof(int))) ) { fprintf (stderr, "Malloc fails for nnzToRecv[]."); return (memDist + memNLU); } if ( !(ptrToRecv = (int *) SUPERLU_MALLOC(nprocs*sizeof(int))) ) { fprintf (stderr, "Malloc fails for ptrToRecv[]."); return (memDist + memNLU); } if ( !(nnzToSend = (int *) SUPERLU_MALLOC(nprocs*sizeof(int))) ) { fprintf (stderr, "Malloc fails for nnzToRecv[]."); return (memDist + memNLU); } if ( !(ptrToSend = (int *) SUPERLU_MALLOC(nprocs*sizeof(int))) ) { fprintf (stderr, "Malloc fails for ptrToRecv[]."); return (memDist + memNLU); } if (memDist < (nsupers*k*iword +4*nprocs * sizeof(int))) memDist = nsupers*k*iword +4*nprocs * sizeof(int); for (p = 0; p < nprocs; p++) nnzToRecv[p] = 0; for (jb = 0; jb < nsupers; jb++) { jbcol = PCOL( jb, grid ); jbrow = PROW( jb, grid ); p = PNUM(jbrow, jbcol, grid); nnzToRecv[p] += grid->npcol; } i = 0; for (p = 0; p < nprocs; p++) { ptrToRecv[p] = i; i += nnzToRecv[p]; ptrToSend[p] = 0; if (p != iam) nnzToSend[p] = nnzToRecv[iam]; else nnzToSend[p] = 0; } nnzToRecv[iam] = 0; i = ptrToRecv[iam]; for (jb = 0; jb < nsupers; jb++) { jbcol = PCOL( jb, grid ); jbrow = PROW( jb, grid ); p = PNUM(jbrow, jbcol, grid); if (p == iam) { ljb_j = LBj( jb, grid ); /* Local block number column wise */ for (j = 0; j < grid->npcol; j++, i++) recvBuf[i] = ToSendR[ljb_j][j]; } } MPI_Alltoallv (&(recvBuf[ptrToRecv[iam]]), nnzToSend, ptrToSend, mpi_int_t, recvBuf, nnzToRecv, ptrToRecv, mpi_int_t, grid->comm); for (jb = 0; jb < nsupers; jb++) { jbcol = PCOL( jb, grid ); jbrow = PROW( jb, grid ); p = PNUM(jbrow, jbcol, grid); ljb_j = LBj( jb, grid ); /* Local block number column wise */ ljb_i = LBi( jb, grid ); /* Local block number row wise */ /* (myrow == jbrow) { if (ToSendD[ljb_i] == YES) ToRecv[jb] = 1; } else { if (recvBuf[ptrToRecv[p] + mycol] == YES) ToRecv[jb] = 2; } */ if (recvBuf[ptrToRecv[p] + mycol] == YES) { if (myrow == jbrow) ToRecv[jb] = 1; else ToRecv[jb] = 2; } if (mycol == jbcol) { for (i = 0, j = ptrToRecv[p]; i < grid->npcol; i++, j++) ToSendR[ljb_j][i] = recvBuf[j]; ToSendR[ljb_j][mycol] = EMPTY; } ptrToRecv[p] += grid->npcol; } /* exchange information about bsendx_plist in between column of processors */ MPI_Allreduce ((*bsendx_plist), recvBuf, nsupers_j * grid->nprow, mpi_int_t, MPI_MAX, grid->cscp.comm); for (jb = 0; jb < nsupers; jb ++) { jbcol = PCOL( jb, grid); jbrow = PROW( jb, grid); if (mycol == jbcol) { ljb_j = LBj( jb, grid ); /* Local block number column wise */ if (myrow == jbrow ) { for (k = ljb_j * grid->nprow; k < (ljb_j+1) * grid->nprow; k++) { (*bsendx_plist)[k] = recvBuf[k]; if ((*bsendx_plist)[k] != EMPTY) nbsendx ++; } } else { for (k = ljb_j * grid->nprow; k < (ljb_j+1) * grid->nprow; k++) (*bsendx_plist)[k] = EMPTY; } } } SUPERLU_FREE(nnzToRecv); SUPERLU_FREE(ptrToRecv); SUPERLU_FREE(nnzToSend); SUPERLU_FREE(ptrToSend); SUPERLU_FREE(recvBuf); Llu->Lrowind_bc_ptr = Lrowind_bc_ptr; Llu->Lnzval_bc_ptr = Lnzval_bc_ptr; Llu->Ufstnz_br_ptr = Ufstnz_br_ptr; Llu->Unzval_br_ptr = Unzval_br_ptr; Llu->ToRecv = ToRecv; Llu->ToSendD = ToSendD; Llu->ToSendR = ToSendR; Llu->fmod = fmod; Llu->fsendx_plist = fsendx_plist; Llu->nfrecvx = nfrecvx; Llu->nfsendx = nfsendx; Llu->bmod = bmod; Llu->bsendx_plist = bsendx_plist; Llu->nbrecvx = nbrecvx; Llu->nbsendx = nbsendx; Llu->ilsum = ilsum; Llu->ldalsum = ldaspa; LUstruct->Glu_persist = Glu_persist; #if ( PRNTlevel>=1 ) if ( !iam ) printf(".. # L blocks " IFMT "\t# U blocks " IFMT "\n", nLblocks, nUblocks); #endif k = CEILING( nsupers, grid->nprow );/* Number of local block rows */ if ( !(Llu->mod_bit = intMalloc_dist(k)) ) ABORT("Malloc fails for mod_bit[]."); /* Find the maximum buffer size. */ MPI_Allreduce(mybufmax, Llu->bufmax, NBUFFERS, mpi_int_t, MPI_MAX, grid->comm); #if ( DEBUGlevel>=1 ) /* Memory allocated but not freed: ilsum, fmod, fsendx_plist, bmod, bsendx_plist, ToRecv, ToSendR, ToSendD, mod_bit */ CHECK_MALLOC(iam, "Exit dist_psymbtonum()"); #endif return (- (memDist+memNLU)); } /* ddist_psymbtonum */ SuperLU_DIST_5.3.0/SRC/html_mainpage.h0000644013363400111340000000136713233431301016250 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! \mainpage SuperLU_DIST Documentation SuperLU_DIST is a parallel library for the direct solution of large, sparse, nonsymmetric systems of linear equations for distributed memory machines. The library is written in C and MPI, and is callable from either C or Fortran. The library routines perform an LU decomposition with static pivoting and triangular system solutions through forward and back substitution. */ SuperLU_DIST_5.3.0/SRC/dgsequ_dist.c0000644013363400111340000001317413233431301015750 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Computes row and column scalings */ /* * File name: dgsequ.c * History: Modified from LAPACK routine DGEEQU */ #include #include "superlu_ddefs.h" /*! \brief
    
    Purpose   
    =======   

    DGSEQU_dist computes row and column scalings intended to equilibrate an   
    M-by-N sparse matrix A and reduce its condition number. R returns the row
    scale factors and C the column scale factors, chosen to try to make   
    the largest element in each row and column of the matrix B with   
    elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1.   

    R(i) and C(j) are restricted to be between SMLNUM = smallest safe   
    number and BIGNUM = largest safe number.  Use of these scaling   
    factors is not guaranteed to reduce the condition number of A but   
    works well in practice.   

    See supermatrix.h for the definition of 'SuperMatrix' structure.
 
    Arguments   
    =========   

    A       (input) SuperMatrix*
            The matrix of dimension (A->nrow, A->ncol) whose equilibration
            factors are to be computed. The type of A can be:
            Stype = SLU_NC; Dtype = SLU_D; Mtype = SLU_GE.
	    
    R       (output) double*, size A->nrow
            If INFO = 0 or INFO > M, R contains the row scale factors   
            for A.
	    
    C       (output) double*, size A->ncol
            If INFO = 0,  C contains the column scale factors for A.
	    
    ROWCND  (output) double*
            If INFO = 0 or INFO > M, ROWCND contains the ratio of the   
            smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and   
            AMAX is neither too large nor too small, it is not worth   
            scaling by R.
	    
    COLCND  (output) double*
            If INFO = 0, COLCND contains the ratio of the smallest   
            C(i) to the largest C(i).  If COLCND >= 0.1, it is not   
            worth scaling by C.
	    
    AMAX    (output) double*
            Absolute value of largest matrix element.  If AMAX is very   
            close to overflow or very close to underflow, the matrix   
            should be scaled.
	    
    INFO    (output) int*
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   
            > 0:  if INFO = i,  and i is   
                  <= M:  the i-th row of A is exactly zero   
                  >  M:  the (i-M)-th column of A is exactly zero   

    ===================================================================== 
*/ void dgsequ_dist(SuperMatrix *A, double *r, double *c, double *rowcnd, double *colcnd, double *amax, int_t *info) { /* Local variables */ NCformat *Astore; double *Aval; int i, j, irow; double rcmin, rcmax; double bignum, smlnum; /* Test the input parameters. */ *info = 0; if ( A->nrow < 0 || A->ncol < 0 || A->Stype != SLU_NC || A->Dtype != SLU_D || A->Mtype != SLU_GE ) *info = -1; if (*info != 0) { i = -(*info); xerr_dist("dgsequ_dist", &i); return; } /* Quick return if possible */ if ( A->nrow == 0 || A->ncol == 0 ) { *rowcnd = 1.; *colcnd = 1.; *amax = 0.; return; } Astore = (NCformat *) A->Store; Aval = (double *) Astore->nzval; /* Get machine constants. */ smlnum = dmach_dist("S"); bignum = 1. / smlnum; /* Compute row scale factors. */ for (i = 0; i < A->nrow; ++i) r[i] = 0.; /* Find the maximum element in each row. */ for (j = 0; j < A->ncol; ++j) for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; r[irow] = SUPERLU_MAX( r[irow], fabs(Aval[i]) ); } /* Find the maximum and minimum scale factors. */ rcmin = bignum; rcmax = 0.; for (i = 0; i < A->nrow; ++i) { rcmax = SUPERLU_MAX(rcmax, r[i]); rcmin = SUPERLU_MIN(rcmin, r[i]); } *amax = rcmax; if (rcmin == 0.) { /* Find the first zero scale factor and return an error code. */ for (i = 0; i < A->nrow; ++i) if (r[i] == 0.) { *info = i + 1; return; } } else { /* Invert the scale factors. */ for (i = 0; i < A->nrow; ++i) r[i] = 1. / SUPERLU_MIN( SUPERLU_MAX( r[i], smlnum ), bignum ); /* Compute ROWCND = min(R(I)) / max(R(I)) */ *rowcnd = SUPERLU_MAX( rcmin, smlnum ) / SUPERLU_MIN( rcmax, bignum ); } /* Compute column scale factors */ for (j = 0; j < A->ncol; ++j) c[j] = 0.; /* Find the maximum element in each column, assuming the row scalings computed above. */ for (j = 0; j < A->ncol; ++j) for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; c[j] = SUPERLU_MAX( c[j], fabs(Aval[i]) * r[irow] ); } /* Find the maximum and minimum scale factors. */ rcmin = bignum; rcmax = 0.; for (j = 0; j < A->ncol; ++j) { rcmax = SUPERLU_MAX(rcmax, c[j]); rcmin = SUPERLU_MIN(rcmin, c[j]); } if (rcmin == 0.) { /* Find the first zero scale factor and return an error code. */ for (j = 0; j < A->ncol; ++j) if ( c[j] == 0. ) { *info = A->nrow + j + 1; return; } } else { /* Invert the scale factors. */ for (j = 0; j < A->ncol; ++j) c[j] = 1. / SUPERLU_MIN( SUPERLU_MAX( c[j], smlnum ), bignum); /* Compute COLCND = min(C(J)) / max(C(J)) */ *colcnd = SUPERLU_MAX( rcmin, smlnum ) / SUPERLU_MIN( rcmax, bignum ); } return; } /* dgsequ_dist */ SuperLU_DIST_5.3.0/SRC/pddistribute.c0000644013363400111340000010564513233431301016144 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Re-distribute A on the 2D process mesh. *
 * -- Distributed SuperLU routine (version 2.3) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * October 15, 2008
 * 
*/ #include "superlu_ddefs.h" /*! \brief * *
 * Purpose
 * =======
 *   Re-distribute A on the 2D process mesh.
 * 
 * Arguments
 * =========
 * 
 * A      (input) SuperMatrix*
 *	  The distributed input matrix A of dimension (A->nrow, A->ncol).
 *        A may be overwritten by diag(R)*A*diag(C)*Pc^T.
 *        The type of A can be: Stype = SLU_NR_loc; Dtype = SLU_D; Mtype = SLU_GE.
 *
 * ScalePermstruct (input) ScalePermstruct_t*
 *        The data structure to store the scaling and permutation vectors
 *        describing the transformations performed to the original matrix A.
 *
 * Glu_freeable (input) *Glu_freeable_t
 *        The global structure describing the graph of L and U.
 * 
 * grid   (input) gridinfo_t*
 *        The 2D process mesh.
 *
 * colptr (output) int*
 *
 * rowind (output) int*
 *
 * a      (output) double*
 *
 * Return value
 * ============
 * 
*/ int_t dReDistribute_A(SuperMatrix *A, ScalePermstruct_t *ScalePermstruct, Glu_freeable_t *Glu_freeable, int_t *xsup, int_t *supno, gridinfo_t *grid, int_t *colptr[], int_t *rowind[], double *a[]) { NRformat_loc *Astore; int_t *perm_r; /* row permutation vector */ int_t *perm_c; /* column permutation vector */ int_t i, irow, fst_row, j, jcol, k, gbi, gbj, n, m_loc, jsize; int_t nnz_loc; /* number of local nonzeros */ int_t SendCnt; /* number of remote nonzeros to be sent */ int_t RecvCnt; /* number of remote nonzeros to be sent */ int_t *nnzToSend, *nnzToRecv, maxnnzToRecv; int_t *ia, *ja, **ia_send, *index, *itemp; int_t *ptr_to_send; double *aij, **aij_send, *nzval, *dtemp; double *nzval_a; int iam, it, p, procs; MPI_Request *send_req; MPI_Status status; /* ------------------------------------------------------------ INITIALIZATION. ------------------------------------------------------------*/ iam = grid->iam; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter dReDistribute_A()"); #endif perm_r = ScalePermstruct->perm_r; perm_c = ScalePermstruct->perm_c; procs = grid->nprow * grid->npcol; Astore = (NRformat_loc *) A->Store; n = A->ncol; m_loc = Astore->m_loc; fst_row = Astore->fst_row; nnzToRecv = intCalloc_dist(2*procs); nnzToSend = nnzToRecv + procs; /* ------------------------------------------------------------ COUNT THE NUMBER OF NONZEROS TO BE SENT TO EACH PROCESS, THEN ALLOCATE SPACE. THIS ACCOUNTS FOR THE FIRST PASS OF A. ------------------------------------------------------------*/ for (i = 0; i < m_loc; ++i) { for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) { irow = perm_c[perm_r[i+fst_row]]; /* Row number in Pc*Pr*A */ jcol = Astore->colind[j]; gbi = BlockNum( irow ); gbj = BlockNum( jcol ); p = PNUM( PROW(gbi,grid), PCOL(gbj,grid), grid ); ++nnzToSend[p]; } } /* All-to-all communication */ MPI_Alltoall( nnzToSend, 1, mpi_int_t, nnzToRecv, 1, mpi_int_t, grid->comm); maxnnzToRecv = 0; nnz_loc = SendCnt = RecvCnt = 0; for (p = 0; p < procs; ++p) { if ( p != iam ) { SendCnt += nnzToSend[p]; RecvCnt += nnzToRecv[p]; maxnnzToRecv = SUPERLU_MAX( nnzToRecv[p], maxnnzToRecv ); } else { nnz_loc += nnzToRecv[p]; /*assert(nnzToSend[p] == nnzToRecv[p]);*/ } } k = nnz_loc + RecvCnt; /* Total nonzeros ended up in my process. */ /* Allocate space for storing the triplets after redistribution. */ if ( k ) { /* count can be zero. */ if ( !(ia = intMalloc_dist(2*k)) ) ABORT("Malloc fails for ia[]."); if ( !(aij = doubleMalloc_dist(k)) ) ABORT("Malloc fails for aij[]."); } ja = ia + k; /* Allocate temporary storage for sending/receiving the A triplets. */ if ( procs > 1 ) { if ( !(send_req = (MPI_Request *) SUPERLU_MALLOC(2*procs *sizeof(MPI_Request))) ) ABORT("Malloc fails for send_req[]."); if ( !(ia_send = (int_t **) SUPERLU_MALLOC(procs*sizeof(int_t*))) ) ABORT("Malloc fails for ia_send[]."); if ( !(aij_send = (double **)SUPERLU_MALLOC(procs*sizeof(double*))) ) ABORT("Malloc fails for aij_send[]."); if ( SendCnt ) { /* count can be zero */ if ( !(index = intMalloc_dist(2*SendCnt)) ) ABORT("Malloc fails for index[]."); if ( !(nzval = doubleMalloc_dist(SendCnt)) ) ABORT("Malloc fails for nzval[]."); } if ( !(ptr_to_send = intCalloc_dist(procs)) ) ABORT("Malloc fails for ptr_to_send[]."); if ( maxnnzToRecv ) { /* count can be zero */ if ( !(itemp = intMalloc_dist(2*maxnnzToRecv)) ) ABORT("Malloc fails for itemp[]."); if ( !(dtemp = doubleMalloc_dist(maxnnzToRecv)) ) ABORT("Malloc fails for dtemp[]."); } for (i = 0, j = 0, p = 0; p < procs; ++p) { if ( p != iam ) { ia_send[p] = &index[i]; i += 2 * nnzToSend[p]; /* ia/ja indices alternate */ aij_send[p] = &nzval[j]; j += nnzToSend[p]; } } } /* if procs > 1 */ if ( !(*colptr = intCalloc_dist(n+1)) ) ABORT("Malloc fails for *colptr[]."); /* ------------------------------------------------------------ LOAD THE ENTRIES OF A INTO THE (IA,JA,AIJ) STRUCTURES TO SEND. THIS ACCOUNTS FOR THE SECOND PASS OF A. ------------------------------------------------------------*/ nnz_loc = 0; /* Reset the local nonzero count. */ nzval_a = Astore->nzval; for (i = 0; i < m_loc; ++i) { for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) { irow = perm_c[perm_r[i+fst_row]]; /* Row number in Pc*Pr*A */ jcol = Astore->colind[j]; gbi = BlockNum( irow ); gbj = BlockNum( jcol ); p = PNUM( PROW(gbi,grid), PCOL(gbj,grid), grid ); if ( p != iam ) { /* remote */ k = ptr_to_send[p]; ia_send[p][k] = irow; ia_send[p][k + nnzToSend[p]] = jcol; aij_send[p][k] = nzval_a[j]; ++ptr_to_send[p]; } else { /* local */ ia[nnz_loc] = irow; ja[nnz_loc] = jcol; aij[nnz_loc] = nzval_a[j]; ++nnz_loc; ++(*colptr)[jcol]; /* Count nonzeros in each column */ } } } /* ------------------------------------------------------------ PERFORM REDISTRIBUTION. THIS INVOLVES ALL-TO-ALL COMMUNICATION. NOTE: Can possibly use MPI_Alltoallv. ------------------------------------------------------------*/ for (p = 0; p < procs; ++p) { if ( p != iam ) { it = 2*nnzToSend[p]; MPI_Isend( ia_send[p], it, mpi_int_t, p, iam, grid->comm, &send_req[p] ); it = nnzToSend[p]; MPI_Isend( aij_send[p], it, MPI_DOUBLE, p, iam+procs, grid->comm, &send_req[procs+p] ); } } for (p = 0; p < procs; ++p) { if ( p != iam ) { it = 2*nnzToRecv[p]; MPI_Recv( itemp, it, mpi_int_t, p, p, grid->comm, &status ); it = nnzToRecv[p]; MPI_Recv( dtemp, it, MPI_DOUBLE, p, p+procs, grid->comm, &status ); for (i = 0; i < nnzToRecv[p]; ++i) { ia[nnz_loc] = itemp[i]; jcol = itemp[i + nnzToRecv[p]]; /*assert(jcol 1 ) { SUPERLU_FREE(send_req); SUPERLU_FREE(ia_send); SUPERLU_FREE(aij_send); if ( SendCnt ) { SUPERLU_FREE(index); SUPERLU_FREE(nzval); } SUPERLU_FREE(ptr_to_send); if ( maxnnzToRecv ) { SUPERLU_FREE(itemp); SUPERLU_FREE(dtemp); } } /* ------------------------------------------------------------ CONVERT THE TRIPLET FORMAT INTO THE CCS FORMAT. ------------------------------------------------------------*/ if ( nnz_loc ) { /* nnz_loc can be zero */ if ( !(*rowind = intMalloc_dist(nnz_loc)) ) ABORT("Malloc fails for *rowind[]."); if ( !(*a = doubleMalloc_dist(nnz_loc)) ) ABORT("Malloc fails for *a[]."); } /* Initialize the array of column pointers */ k = 0; jsize = (*colptr)[0]; (*colptr)[0] = 0; for (j = 1; j < n; ++j) { k += jsize; jsize = (*colptr)[j]; (*colptr)[j] = k; } /* Copy the triplets into the column oriented storage */ for (i = 0; i < nnz_loc; ++i) { j = ja[i]; k = (*colptr)[j]; (*rowind)[k] = ia[i]; (*a)[k] = aij[i]; ++(*colptr)[j]; } /* Reset the column pointers to the beginning of each column */ for (j = n; j > 0; --j) (*colptr)[j] = (*colptr)[j-1]; (*colptr)[0] = 0; if ( nnz_loc ) { SUPERLU_FREE(ia); SUPERLU_FREE(aij); } #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit dReDistribute_A()"); #endif return 0; } /* dReDistribute_A */ float pddistribute(fact_t fact, int_t n, SuperMatrix *A, ScalePermstruct_t *ScalePermstruct, Glu_freeable_t *Glu_freeable, LUstruct_t *LUstruct, gridinfo_t *grid) /* * -- Distributed SuperLU routine (version 2.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley. * March 15, 2003 * * * Purpose * ======= * Distribute the matrix onto the 2D process mesh. * * Arguments * ========= * * fact (input) fact_t * Specifies whether or not the L and U structures will be re-used. * = SamePattern_SameRowPerm: L and U structures are input, and * unchanged on exit. * = DOFACT or SamePattern: L and U structures are computed and output. * * n (input) int * Dimension of the matrix. * * A (input) SuperMatrix* * The distributed input matrix A of dimension (A->nrow, A->ncol). * A may be overwritten by diag(R)*A*diag(C)*Pc^T. The type of A can be: * Stype = SLU_NR_loc; Dtype = SLU_D; Mtype = SLU_GE. * * ScalePermstruct (input) ScalePermstruct_t* * The data structure to store the scaling and permutation vectors * describing the transformations performed to the original matrix A. * * Glu_freeable (input) *Glu_freeable_t * The global structure describing the graph of L and U. * * LUstruct (input) LUstruct_t* * Data structures for L and U factors. * * grid (input) gridinfo_t* * The 2D process mesh. * * Return value * ============ * > 0, working storage required (in bytes). * */ { Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; int_t bnnz, fsupc, fsupc1, i, ii, irow, istart, j, jb, jj, k, len, len1, nsupc; int_t ljb; /* local block column number */ int_t nrbl; /* number of L blocks in current block column */ int_t nrbu; /* number of U blocks in current block column */ int_t gb; /* global block number; 0 < gb <= nsuper */ int_t lb; /* local block number; 0 < lb <= ceil(NSUPERS/Pr) */ int iam, jbrow, kcol, mycol, myrow, pc, pr; int_t mybufmax[NBUFFERS]; NRformat_loc *Astore; double *a; int_t *asub, *xa; int_t *xsup = Glu_persist->xsup; /* supernode and column mapping */ int_t *supno = Glu_persist->supno; int_t *lsub, *xlsub, *usub, *xusub; int_t nsupers; int_t next_lind; /* next available position in index[*] */ int_t next_lval; /* next available position in nzval[*] */ int_t *index; /* indices consist of headers and row subscripts */ int *index1; /* temporary pointer to array of int */ double *lusup, *uval; /* nonzero values in L and U */ double **Lnzval_bc_ptr; /* size ceil(NSUPERS/Pc) */ int_t **Lrowind_bc_ptr; /* size ceil(NSUPERS/Pc) */ double **Unzval_br_ptr; /* size ceil(NSUPERS/Pr) */ int_t **Ufstnz_br_ptr; /* size ceil(NSUPERS/Pr) */ /*-- Counts to be used in factorization. --*/ int *ToRecv, *ToSendD, **ToSendR; /*-- Counts to be used in lower triangular solve. --*/ int_t *fmod; /* Modification count for L-solve. */ int_t **fsendx_plist; /* Column process list to send down Xk. */ int_t nfrecvx = 0; /* Number of Xk I will receive. */ int_t nfsendx = 0; /* Number of Xk I will send */ int_t kseen; /*-- Counts to be used in upper triangular solve. --*/ int_t *bmod; /* Modification count for U-solve. */ int_t **bsendx_plist; /* Column process list to send down Xk. */ int_t nbrecvx = 0; /* Number of Xk I will receive. */ int_t nbsendx = 0; /* Number of Xk I will send */ int_t *ilsum; /* starting position of each supernode in the full array (local) */ /*-- Auxiliary arrays; freed on return --*/ int_t *rb_marker; /* block hit marker; size ceil(NSUPERS/Pr) */ int_t *Urb_length; /* U block length; size ceil(NSUPERS/Pr) */ int_t *Urb_indptr; /* pointers to U index[]; size ceil(NSUPERS/Pr) */ int_t *Urb_fstnz; /* # of fstnz in a block row; size ceil(NSUPERS/Pr) */ int_t *Ucbs; /* number of column blocks in a block row */ int_t *Lrb_length; /* L block length; size ceil(NSUPERS/Pr) */ int_t *Lrb_number; /* global block number; size ceil(NSUPERS/Pr) */ int_t *Lrb_indptr; /* pointers to L index[]; size ceil(NSUPERS/Pr) */ int_t *Lrb_valptr; /* pointers to L nzval[]; size ceil(NSUPERS/Pr) */ double *dense, *dense_col; /* SPA */ double zero = 0.0; int_t ldaspa; /* LDA of SPA */ int_t iword, dword; float mem_use = 0.0; #if ( PRNTlevel>=1 ) int_t nLblocks = 0, nUblocks = 0; #endif #if ( PROFlevel>=1 ) double t, t_u, t_l; int_t u_blks; #endif /* Initialization. */ iam = grid->iam; myrow = MYROW( iam, grid ); mycol = MYCOL( iam, grid ); for (i = 0; i < NBUFFERS; ++i) mybufmax[i] = 0; nsupers = supno[n-1] + 1; Astore = (NRformat_loc *) A->Store; #if ( PRNTlevel>=1 ) iword = sizeof(int_t); dword = sizeof(double); #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter pddistribute()"); #endif #if ( PROFlevel>=1 ) t = SuperLU_timer_(); #endif dReDistribute_A(A, ScalePermstruct, Glu_freeable, xsup, supno, grid, &xa, &asub, &a); #if ( PROFlevel>=1 ) t = SuperLU_timer_() - t; if ( !iam ) printf("--------\n" ".. Phase 1 - ReDistribute_A time: %.2f\t\n", t); #endif if ( fact == SamePattern_SameRowPerm ) { #if ( PROFlevel>=1 ) t_l = t_u = 0; u_blks = 0; #endif /* We can propagate the new values of A into the existing L and U data structures. */ ilsum = Llu->ilsum; ldaspa = Llu->ldalsum; if ( !(dense = doubleCalloc_dist(ldaspa * sp_ienv_dist(3))) ) ABORT("Calloc fails for SPA dense[]."); nrbu = CEILING( nsupers, grid->nprow ); /* No. of local block rows */ if ( !(Urb_length = intCalloc_dist(nrbu)) ) ABORT("Calloc fails for Urb_length[]."); if ( !(Urb_indptr = intMalloc_dist(nrbu)) ) ABORT("Malloc fails for Urb_indptr[]."); Lrowind_bc_ptr = Llu->Lrowind_bc_ptr; Lnzval_bc_ptr = Llu->Lnzval_bc_ptr; Ufstnz_br_ptr = Llu->Ufstnz_br_ptr; Unzval_br_ptr = Llu->Unzval_br_ptr; #if ( PRNTlevel>=1 ) mem_use += 2.0*nrbu*iword + ldaspa*sp_ienv_dist(3)*dword; #endif #if ( PROFlevel>=1 ) t = SuperLU_timer_(); #endif /* Initialize Uval to zero. */ for (lb = 0; lb < nrbu; ++lb) { Urb_indptr[lb] = BR_HEADER; /* Skip header in U index[]. */ index = Ufstnz_br_ptr[lb]; if ( index ) { uval = Unzval_br_ptr[lb]; len = index[1]; for (i = 0; i < len; ++i) uval[i] = zero; } /* if index != NULL */ } /* for lb ... */ for (jb = 0; jb < nsupers; ++jb) { /* Loop through each block column */ pc = PCOL( jb, grid ); if ( mycol == pc ) { /* Block column jb in my process column */ fsupc = FstBlockC( jb ); nsupc = SuperSize( jb ); /* Scatter A into SPA (for L), or into U directly. */ for (j = fsupc, dense_col = dense; j < FstBlockC(jb+1); ++j) { for (i = xa[j]; i < xa[j+1]; ++i) { irow = asub[i]; gb = BlockNum( irow ); if ( myrow == PROW( gb, grid ) ) { lb = LBi( gb, grid ); if ( gb < jb ) { /* in U */ index = Ufstnz_br_ptr[lb]; uval = Unzval_br_ptr[lb]; while ( (k = index[Urb_indptr[lb]]) < jb ) { /* Skip nonzero values in this block */ Urb_length[lb] += index[Urb_indptr[lb]+1]; /* Move pointer to the next block */ Urb_indptr[lb] += UB_DESCRIPTOR + SuperSize( k ); } /*assert(k == jb);*/ /* start fstnz */ istart = Urb_indptr[lb] + UB_DESCRIPTOR; len = Urb_length[lb]; fsupc1 = FstBlockC( gb+1 ); k = j - fsupc; /* Sum the lengths of the leading columns */ for (jj = 0; jj < k; ++jj) len += fsupc1 - index[istart++]; /*assert(irow>=index[istart]);*/ uval[len + irow - index[istart]] = a[i]; } else { /* in L; put in SPA first */ irow = ilsum[lb] + irow - FstBlockC( gb ); dense_col[irow] = a[i]; } } } /* for i ... */ dense_col += ldaspa; } /* for j ... */ #if ( PROFlevel>=1 ) t_u += SuperLU_timer_() - t; t = SuperLU_timer_(); #endif /* Gather the values of A from SPA into Lnzval[]. */ ljb = LBj( jb, grid ); /* Local block number */ index = Lrowind_bc_ptr[ljb]; if ( index ) { nrbl = index[0]; /* Number of row blocks. */ len = index[1]; /* LDA of lusup[]. */ lusup = Lnzval_bc_ptr[ljb]; next_lind = BC_HEADER; next_lval = 0; for (jj = 0; jj < nrbl; ++jj) { gb = index[next_lind++]; len1 = index[next_lind++]; /* Rows in the block. */ lb = LBi( gb, grid ); for (bnnz = 0; bnnz < len1; ++bnnz) { irow = index[next_lind++]; /* Global index. */ irow = ilsum[lb] + irow - FstBlockC( gb ); k = next_lval++; for (j = 0, dense_col = dense; j < nsupc; ++j) { lusup[k] = dense_col[irow]; dense_col[irow] = zero; k += len; dense_col += ldaspa; } } /* for bnnz ... */ } /* for jj ... */ } /* if index ... */ #if ( PROFlevel>=1 ) t_l += SuperLU_timer_() - t; #endif } /* if mycol == pc */ } /* for jb ... */ SUPERLU_FREE(dense); SUPERLU_FREE(Urb_length); SUPERLU_FREE(Urb_indptr); #if ( PROFlevel>=1 ) if ( !iam ) printf(".. 2nd distribute time: L %.2f\tU %.2f\tu_blks %d\tnrbu %d\n", t_l, t_u, u_blks, nrbu); #endif } else { /* ------------------------------------------------------------ FIRST TIME CREATING THE L AND U DATA STRUCTURES. ------------------------------------------------------------*/ #if ( PROFlevel>=1 ) t_l = t_u = 0; u_blks = 0; #endif /* We first need to set up the L and U data structures and then * propagate the values of A into them. */ lsub = Glu_freeable->lsub; /* compressed L subscripts */ xlsub = Glu_freeable->xlsub; usub = Glu_freeable->usub; /* compressed U subscripts */ xusub = Glu_freeable->xusub; if ( !(ToRecv = (int *) SUPERLU_MALLOC(nsupers * sizeof(int))) ) ABORT("Malloc fails for ToRecv[]."); for (i = 0; i < nsupers; ++i) ToRecv[i] = 0; k = CEILING( nsupers, grid->npcol );/* Number of local column blocks */ if ( !(ToSendR = (int **) SUPERLU_MALLOC(k*sizeof(int*))) ) ABORT("Malloc fails for ToSendR[]."); j = k * grid->npcol; if ( !(index1 = SUPERLU_MALLOC(j * sizeof(int))) ) ABORT("Malloc fails for index[]."); #if ( PRNTlevel>=1 ) mem_use += (float) k*sizeof(int_t*) + (j + nsupers)*iword; #endif for (i = 0; i < j; ++i) index1[i] = EMPTY; for (i = 0,j = 0; i < k; ++i, j += grid->npcol) ToSendR[i] = &index1[j]; k = CEILING( nsupers, grid->nprow ); /* Number of local block rows */ /* Pointers to the beginning of each block row of U. */ if ( !(Unzval_br_ptr = (double**)SUPERLU_MALLOC(k * sizeof(double*))) ) ABORT("Malloc fails for Unzval_br_ptr[]."); if ( !(Ufstnz_br_ptr = (int_t**)SUPERLU_MALLOC(k * sizeof(int_t*))) ) ABORT("Malloc fails for Ufstnz_br_ptr[]."); if ( !(ToSendD = SUPERLU_MALLOC(k * sizeof(int))) ) ABORT("Malloc fails for ToSendD[]."); for (i = 0; i < k; ++i) ToSendD[i] = NO; if ( !(ilsum = intMalloc_dist(k+1)) ) ABORT("Malloc fails for ilsum[]."); /* Auxiliary arrays used to set up U block data structures. They are freed on return. */ if ( !(rb_marker = intCalloc_dist(k)) ) ABORT("Calloc fails for rb_marker[]."); if ( !(Urb_length = intCalloc_dist(k)) ) ABORT("Calloc fails for Urb_length[]."); if ( !(Urb_indptr = intMalloc_dist(k)) ) ABORT("Malloc fails for Urb_indptr[]."); if ( !(Urb_fstnz = intCalloc_dist(k)) ) ABORT("Calloc fails for Urb_fstnz[]."); if ( !(Ucbs = intCalloc_dist(k)) ) ABORT("Calloc fails for Ucbs[]."); #if ( PRNTlevel>=1 ) mem_use += 2.0*k*sizeof(int_t*) + (7*k+1)*iword; #endif /* Compute ldaspa and ilsum[]. */ ldaspa = 0; ilsum[0] = 0; for (gb = 0; gb < nsupers; ++gb) { if ( myrow == PROW( gb, grid ) ) { i = SuperSize( gb ); ldaspa += i; lb = LBi( gb, grid ); ilsum[lb + 1] = ilsum[lb] + i; } } #if ( PROFlevel>=1 ) t = SuperLU_timer_(); #endif /* ------------------------------------------------------------ COUNT NUMBER OF ROW BLOCKS AND THE LENGTH OF EACH BLOCK IN U. THIS ACCOUNTS FOR ONE-PASS PROCESSING OF G(U). ------------------------------------------------------------*/ /* Loop through each supernode column. */ for (jb = 0; jb < nsupers; ++jb) { pc = PCOL( jb, grid ); fsupc = FstBlockC( jb ); nsupc = SuperSize( jb ); /* Loop through each column in the block. */ for (j = fsupc; j < fsupc + nsupc; ++j) { /* usub[*] contains only "first nonzero" in each segment. */ for (i = xusub[j]; i < xusub[j+1]; ++i) { irow = usub[i]; /* First nonzero of the segment. */ gb = BlockNum( irow ); kcol = PCOL( gb, grid ); ljb = LBj( gb, grid ); if ( mycol == kcol && mycol != pc ) ToSendR[ljb][pc] = YES; pr = PROW( gb, grid ); lb = LBi( gb, grid ); if ( mycol == pc ) { if ( myrow == pr ) { ToSendD[lb] = YES; /* Count nonzeros in entire block row. */ Urb_length[lb] += FstBlockC( gb+1 ) - irow; if (rb_marker[lb] <= jb) {/* First see the block */ rb_marker[lb] = jb + 1; Urb_fstnz[lb] += nsupc; ++Ucbs[lb]; /* Number of column blocks in block row lb. */ #if ( PRNTlevel>=1 ) ++nUblocks; #endif } ToRecv[gb] = 1; } else ToRecv[gb] = 2; /* Do I need 0, 1, 2 ? */ } } /* for i ... */ } /* for j ... */ } /* for jb ... */ /* Set up the initial pointers for each block row in U. */ nrbu = CEILING( nsupers, grid->nprow );/* Number of local block rows */ for (lb = 0; lb < nrbu; ++lb) { len = Urb_length[lb]; rb_marker[lb] = 0; /* Reset block marker. */ if ( len ) { /* Add room for descriptors */ len1 = Urb_fstnz[lb] + BR_HEADER + Ucbs[lb] * UB_DESCRIPTOR; if ( !(index = intMalloc_dist(len1+1)) ) ABORT("Malloc fails for Uindex[]."); Ufstnz_br_ptr[lb] = index; if ( !(Unzval_br_ptr[lb] = doubleMalloc_dist(len)) ) ABORT("Malloc fails for Unzval_br_ptr[*][]."); mybufmax[2] = SUPERLU_MAX( mybufmax[2], len1 ); mybufmax[3] = SUPERLU_MAX( mybufmax[3], len ); index[0] = Ucbs[lb]; /* Number of column blocks */ index[1] = len; /* Total length of nzval[] */ index[2] = len1; /* Total length of index[] */ index[len1] = -1; /* End marker */ } else { Ufstnz_br_ptr[lb] = NULL; Unzval_br_ptr[lb] = NULL; } Urb_length[lb] = 0; /* Reset block length. */ Urb_indptr[lb] = BR_HEADER; /* Skip header in U index[]. */ Urb_fstnz[lb] = BR_HEADER; } /* for lb ... */ SUPERLU_FREE(Ucbs); #if ( PROFlevel>=1 ) t = SuperLU_timer_() - t; if ( !iam) printf(".. Phase 2 - setup U strut time: %.2f\t\n", t); #endif #if ( PRNTlevel>=1 ) mem_use -= 2.0*k * iword; #endif /* Auxiliary arrays used to set up L block data structures. They are freed on return. k is the number of local row blocks. */ if ( !(Lrb_length = intCalloc_dist(k)) ) ABORT("Calloc fails for Lrb_length[]."); if ( !(Lrb_number = intMalloc_dist(k)) ) ABORT("Malloc fails for Lrb_number[]."); if ( !(Lrb_indptr = intMalloc_dist(k)) ) ABORT("Malloc fails for Lrb_indptr[]."); if ( !(Lrb_valptr = intMalloc_dist(k)) ) ABORT("Malloc fails for Lrb_valptr[]."); if ( !(dense = doubleCalloc_dist(ldaspa * sp_ienv_dist(3))) ) ABORT("Calloc fails for SPA dense[]."); /* These counts will be used for triangular solves. */ if ( !(fmod = intCalloc_dist(k)) ) ABORT("Calloc fails for fmod[]."); if ( !(bmod = intCalloc_dist(k)) ) ABORT("Calloc fails for bmod[]."); /* ------------------------------------------------ */ #if ( PRNTlevel>=1 ) mem_use += 6.0*k*iword + ldaspa*sp_ienv_dist(3)*dword; #endif k = CEILING( nsupers, grid->npcol );/* Number of local block columns */ /* Pointers to the beginning of each block column of L. */ if ( !(Lnzval_bc_ptr = (double**)SUPERLU_MALLOC(k * sizeof(double*))) ) ABORT("Malloc fails for Lnzval_bc_ptr[]."); if ( !(Lrowind_bc_ptr = (int_t**)SUPERLU_MALLOC(k * sizeof(int_t*))) ) ABORT("Malloc fails for Lrowind_bc_ptr[]."); Lrowind_bc_ptr[k-1] = NULL; /* These lists of processes will be used for triangular solves. */ if ( !(fsendx_plist = (int_t **) SUPERLU_MALLOC(k*sizeof(int_t*))) ) ABORT("Malloc fails for fsendx_plist[]."); len = k * grid->nprow; if ( !(index = intMalloc_dist(len)) ) ABORT("Malloc fails for fsendx_plist[0]"); for (i = 0; i < len; ++i) index[i] = EMPTY; for (i = 0, j = 0; i < k; ++i, j += grid->nprow) fsendx_plist[i] = &index[j]; if ( !(bsendx_plist = (int_t **) SUPERLU_MALLOC(k*sizeof(int_t*))) ) ABORT("Malloc fails for bsendx_plist[]."); if ( !(index = intMalloc_dist(len)) ) ABORT("Malloc fails for bsendx_plist[0]"); for (i = 0; i < len; ++i) index[i] = EMPTY; for (i = 0, j = 0; i < k; ++i, j += grid->nprow) bsendx_plist[i] = &index[j]; /* -------------------------------------------------------------- */ #if ( PRNTlevel>=1 ) mem_use += 4.0*k*sizeof(int_t*) + 2.0*len*iword; #endif /*------------------------------------------------------------ PROPAGATE ROW SUBSCRIPTS AND VALUES OF A INTO L AND U BLOCKS. THIS ACCOUNTS FOR ONE-PASS PROCESSING OF A, L AND U. ------------------------------------------------------------*/ for (jb = 0; jb < nsupers; ++jb) { /* for each block column ... */ pc = PCOL( jb, grid ); if ( mycol == pc ) { /* Block column jb in my process column */ fsupc = FstBlockC( jb ); nsupc = SuperSize( jb ); ljb = LBj( jb, grid ); /* Local block number */ /* Scatter A into SPA. */ for (j = fsupc, dense_col = dense; j < FstBlockC(jb+1); ++j) { for (i = xa[j]; i < xa[j+1]; ++i) { irow = asub[i]; gb = BlockNum( irow ); if ( myrow == PROW( gb, grid ) ) { lb = LBi( gb, grid ); irow = ilsum[lb] + irow - FstBlockC( gb ); dense_col[irow] = a[i]; } } dense_col += ldaspa; } /* for j ... */ jbrow = PROW( jb, grid ); /*------------------------------------------------ * SET UP U BLOCKS. *------------------------------------------------*/ #if ( PROFlevel>=1 ) t = SuperLU_timer_(); #endif kseen = 0; dense_col = dense; /* Loop through each column in the block column. */ for (j = fsupc; j < FstBlockC( jb+1 ); ++j) { istart = xusub[j]; /* NOTE: Only the first nonzero index of the segment is stored in usub[]. */ for (i = istart; i < xusub[j+1]; ++i) { irow = usub[i]; /* First nonzero in the segment. */ gb = BlockNum( irow ); pr = PROW( gb, grid ); if ( pr != jbrow && myrow == jbrow && /* diag. proc. owning jb */ bsendx_plist[ljb][pr] == EMPTY ) { bsendx_plist[ljb][pr] = YES; ++nbsendx; } if ( myrow == pr ) { lb = LBi( gb, grid ); /* Local block number */ index = Ufstnz_br_ptr[lb]; uval = Unzval_br_ptr[lb]; fsupc1 = FstBlockC( gb+1 ); if (rb_marker[lb] <= jb) { /* First time see the block */ rb_marker[lb] = jb + 1; Urb_indptr[lb] = Urb_fstnz[lb];; index[Urb_indptr[lb]] = jb; /* Descriptor */ Urb_indptr[lb] += UB_DESCRIPTOR; /* Record the first location in index[] of the next block */ Urb_fstnz[lb] = Urb_indptr[lb] + nsupc; len = Urb_indptr[lb];/* Start fstnz in index */ index[len-1] = 0; for (k = 0; k < nsupc; ++k) index[len+k] = fsupc1; if ( gb != jb )/* Exclude diagonal block. */ ++bmod[lb];/* Mod. count for back solve */ if ( kseen == 0 && myrow != jbrow ) { ++nbrecvx; kseen = 1; } } else { /* Already saw the block */ len = Urb_indptr[lb];/* Start fstnz in index */ } jj = j - fsupc; index[len+jj] = irow; /* Load the numerical values */ k = fsupc1 - irow; /* No. of nonzeros in segment */ index[len-1] += k; /* Increment block length in Descriptor */ irow = ilsum[lb] + irow - FstBlockC( gb ); for (ii = 0; ii < k; ++ii) { uval[Urb_length[lb]++] = dense_col[irow + ii]; dense_col[irow + ii] = zero; } } /* if myrow == pr ... */ } /* for i ... */ dense_col += ldaspa; } /* for j ... */ #if ( PROFlevel>=1 ) t_u += SuperLU_timer_() - t; t = SuperLU_timer_(); #endif /*------------------------------------------------ * SET UP L BLOCKS. *------------------------------------------------*/ /* Count number of blocks and length of each block. */ nrbl = 0; len = 0; /* Number of row subscripts I own. */ kseen = 0; istart = xlsub[fsupc]; for (i = istart; i < xlsub[fsupc+1]; ++i) { irow = lsub[i]; gb = BlockNum( irow ); /* Global block number */ pr = PROW( gb, grid ); /* Process row owning this block */ if ( pr != jbrow && myrow == jbrow && /* diag. proc. owning jb */ fsendx_plist[ljb][pr] == EMPTY /* first time */ ) { fsendx_plist[ljb][pr] = YES; ++nfsendx; } if ( myrow == pr ) { lb = LBi( gb, grid ); /* Local block number */ if (rb_marker[lb] <= jb) { /* First see this block */ rb_marker[lb] = jb + 1; Lrb_length[lb] = 1; Lrb_number[nrbl++] = gb; if ( gb != jb ) /* Exclude diagonal block. */ ++fmod[lb]; /* Mod. count for forward solve */ if ( kseen == 0 && myrow != jbrow ) { ++nfrecvx; kseen = 1; } #if ( PRNTlevel>=1 ) ++nLblocks; #endif } else { ++Lrb_length[lb]; } ++len; } } /* for i ... */ if ( nrbl ) { /* Do not ensure the blocks are sorted! */ /* Set up the initial pointers for each block in index[] and nzval[]. */ /* Add room for descriptors */ len1 = len + BC_HEADER + nrbl * LB_DESCRIPTOR; if ( !(index = intMalloc_dist(len1)) ) ABORT("Malloc fails for index[]"); Lrowind_bc_ptr[ljb] = index; if (!(Lnzval_bc_ptr[ljb] = doubleMalloc_dist(len*nsupc))) { fprintf(stderr, "col block " IFMT " ", jb); ABORT("Malloc fails for Lnzval_bc_ptr[*][]"); } mybufmax[0] = SUPERLU_MAX( mybufmax[0], len1 ); mybufmax[1] = SUPERLU_MAX( mybufmax[1], len*nsupc ); mybufmax[4] = SUPERLU_MAX( mybufmax[4], len ); index[0] = nrbl; /* Number of row blocks */ index[1] = len; /* LDA of the nzval[] */ next_lind = BC_HEADER; next_lval = 0; for (k = 0; k < nrbl; ++k) { gb = Lrb_number[k]; lb = LBi( gb, grid ); len = Lrb_length[lb]; Lrb_length[lb] = 0; /* Reset vector of block length */ index[next_lind++] = gb; /* Descriptor */ index[next_lind++] = len; Lrb_indptr[lb] = next_lind; Lrb_valptr[lb] = next_lval; next_lind += len; next_lval += len; } /* Propagate the compressed row subscripts to Lindex[], and the initial values of A from SPA into Lnzval[]. */ lusup = Lnzval_bc_ptr[ljb]; len = index[1]; /* LDA of lusup[] */ for (i = istart; i < xlsub[fsupc+1]; ++i) { irow = lsub[i]; gb = BlockNum( irow ); if ( myrow == PROW( gb, grid ) ) { lb = LBi( gb, grid ); k = Lrb_indptr[lb]++; /* Random access a block */ index[k] = irow; k = Lrb_valptr[lb]++; irow = ilsum[lb] + irow - FstBlockC( gb ); for (j = 0, dense_col = dense; j < nsupc; ++j) { lusup[k] = dense_col[irow]; dense_col[irow] = zero; k += len; dense_col += ldaspa; } } } /* for i ... */ } else { Lrowind_bc_ptr[ljb] = NULL; Lnzval_bc_ptr[ljb] = NULL; } /* if nrbl ... */ #if ( PROFlevel>=1 ) t_l += SuperLU_timer_() - t; #endif } /* if mycol == pc */ } /* for jb ... */ Llu->Lrowind_bc_ptr = Lrowind_bc_ptr; Llu->Lnzval_bc_ptr = Lnzval_bc_ptr; Llu->Ufstnz_br_ptr = Ufstnz_br_ptr; Llu->Unzval_br_ptr = Unzval_br_ptr; Llu->ToRecv = ToRecv; Llu->ToSendD = ToSendD; Llu->ToSendR = ToSendR; Llu->fmod = fmod; Llu->fsendx_plist = fsendx_plist; Llu->nfrecvx = nfrecvx; Llu->nfsendx = nfsendx; Llu->bmod = bmod; Llu->bsendx_plist = bsendx_plist; Llu->nbrecvx = nbrecvx; Llu->nbsendx = nbsendx; Llu->ilsum = ilsum; Llu->ldalsum = ldaspa; #if ( PRNTlevel>=1 ) if ( !iam ) printf(".. # L blocks " IFMT "\t# U blocks " IFMT "\n", nLblocks, nUblocks); #endif SUPERLU_FREE(rb_marker); SUPERLU_FREE(Urb_fstnz); SUPERLU_FREE(Urb_length); SUPERLU_FREE(Urb_indptr); SUPERLU_FREE(Lrb_length); SUPERLU_FREE(Lrb_number); SUPERLU_FREE(Lrb_indptr); SUPERLU_FREE(Lrb_valptr); SUPERLU_FREE(dense); /* Find the maximum buffer size. */ MPI_Allreduce(mybufmax, Llu->bufmax, NBUFFERS, mpi_int_t, MPI_MAX, grid->comm); k = CEILING( nsupers, grid->nprow );/* Number of local block rows */ if ( !(Llu->mod_bit = intMalloc_dist(k)) ) ABORT("Malloc fails for mod_bit[]."); #if ( PROFlevel>=1 ) if ( !iam ) printf(".. 1st distribute time:\n " "\tL\t%.2f\n\tU\t%.2f\n" "\tu_blks %d\tnrbu %d\n--------\n", t_l, t_u, u_blks, nrbu); #endif } /* else fact != SamePattern_SameRowPerm */ if ( xa[A->ncol] > 0 ) { /* may not have any entries on this process. */ SUPERLU_FREE(asub); SUPERLU_FREE(a); } SUPERLU_FREE(xa); #if ( DEBUGlevel>=1 ) /* Memory allocated but not freed: ilsum, fmod, fsendx_plist, bmod, bsendx_plist */ CHECK_MALLOC(iam, "Exit pddistribute()"); #endif return (mem_use); } /* PDDISTRIBUTE */ SuperLU_DIST_5.3.0/SRC/dSchCompUdt-cuda.c0000644013363400111340000004604413233431301016526 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief This file contains the main loop of pdgstrf which involves * rank k update of the Schur complement. * Uses CUDA GPU. * *
 * -- Distributed SuperLU routine (version 4.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * October 1, 2014
 *
 */

#define SCHEDULE_STRATEGY dynamic

#define cublasCheckErrors(fn) \
    do { \
        cublasStatus_t __err = fn; \
        if (__err != CUBLAS_STATUS_SUCCESS) { \
            fprintf(stderr, "Fatal cublas error: %d (at %s:%d)\n", \
                (int)(__err), \
                __FILE__, __LINE__); \
            fprintf(stderr, "*** FAILED - ABORTING\n"); \
            exit(1); \
        } \
    } while(0);


if ( msg0 && msg2 ) {  /* L(:,k) and U(k,:) are not empty. */
    ldu   =0;
    full  =1;
    int cum_nrow;
    int temp_nbrow;

    lptr = lptr0;
    luptr = luptr0;
    
    nbrow= lsub[1];
    if (myrow==krow) nbrow = lsub[1]-lsub[3];

    if (nbrow>0) {
        
        int ncol_max = SUPERLU_MIN(buffer_size/nbrow,bigu_size/ldt);
        int num_streams_used,        /*number of streams that will be used*/
        ncpu_blks;                     /*Number of CPU dgemm blks*/

        int jjj, jjj_st,jjj_global;        
        for (j = jj0; j < nub; ++j) {
            arrive_at_ublock( j,&iukp,&rukp,&jb,&ljb,&nsupc,
	    		      iukp0,rukp0,usub,perm_u,xsup,grid );

            ncols =0 ;  //initialize at 0 
            jj = iukp;
            int temp_ldu=0; 
            for (; jj < iukp+nsupc; ++jj) {
                segsize = klst - usub[jj];
                if ( segsize ) {
		    ++ncols;
		}
                temp_ldu = SUPERLU_MAX(temp_ldu, segsize);
            }

            full_u_cols[j] = ncols;
            blk_ldu[j] = temp_ldu;
        } /* end for j = jj0..nub */

        jjj = jj0; /* initialization */
            
        // #pragma omp barrier 
        while ( jjj < nub ) {
            jjj_st=jjj;
#ifdef _OPENMP
#pragma omp single
#endif
            {
                ldu = blk_ldu[jjj_st];
                for (j = jjj_st; j < nub ; ++j) {
                    
                    /* prefix sum */
                    if (j != jjj_st) full_u_cols[j] += full_u_cols[j-1];

                    ldu = SUPERLU_MAX(ldu, blk_ldu[j]);   

                    /* break condition */
                    /* the number of columns that can be processed is limited by buffer size*/
                    if (full_u_cols[j]+((j+1==nub)?0:full_u_cols[j+1]) > ncol_max) {
                        break;
                    }
                } /* end for j=jjj_st to nub */  

                jjj_global = SUPERLU_MIN(nub, j+1); /* Maximum value of jjj will be nub */
                
                // TAU_STATIC_TIMER_START("work_divison");
                /* Divide CPU-GPU gemm here */
                gemm_division_cpu_gpu(
		       &num_streams_used, /*number of streams that will be used*/
		       stream_end_col,    /*array holding last column blk for each partition*/
		       &ncpu_blks,        /*Number of CPU gemm blks*/
		       			  /*input*/
		       nbrow,             /*number of row in A matrix*/
		       ldu,               /*number of k in dgemm*/
		       nstreams,
		       full_u_cols + jjj_st, /*array containing prefix sum of work load*/
		       jjj_global-jjj_st     /*Number of work load */
                );
                // TAU_STATIC_TIMER_STOP("work_divison");

            } /* pragma omp single */

            jjj = jjj_global;
            // printf("thread_id %d, jjj %d \n",thread_id,jjj );
            if (jjj == jjj_st+1 && full_u_cols[jjj_st] > ncol_max) {
                printf("allocate more memory for buffer !!!!\n");
                if(nbrow * full_u_cols[jjj_st] > buffer_size)
                    printf("%d buffer_size %d\n",nbrow*full_u_cols[jjj_st],buffer_size );
            }
            
            // #pragma omp barrier 
            /* gathering circuit */
            assert(jjj_st 0 ) {
#ifdef PI_DEBUG
		printf("nbrow %d *ldu %d  =%d < ldt %d * max_row_size %d =%d \n",nbrow,ldu,nbrow*ldu,ldt,max_row_size,ldt*max_row_size );
		assert(nbrow*ldu<=ldt*max_row_size);
#endif 
		cudaMemcpy2DAsync(dA, nbrow*sizeof(double),
				  &lusup[luptr+(knsupc-ldu)*nsupr],
				  nsupr*sizeof(double), nbrow*sizeof(double),
				  ldu, cudaMemcpyHostToDevice, streams[0]);
	    }
                
	    for (int i = 0; i < num_streams_used; ++i) {
		int st = (i==0) ? ncpu_blks+jjj_st : jjj_st+stream_end_col[i-1]; 
		int st_col = full_u_cols[st-1];
		int num_col_stream = full_u_cols[jjj_st+stream_end_col[i]-1]-full_u_cols[st-1];
		tempu = bigU;
                    
		double *tempv1 = bigV + full_u_cols[st-1]*nbrow;

		/* Following is for testing purpose */
#ifdef GPU_ACC
		int stream_id = i;
		int b_offset  = ldu * st_col;
		int c_offset  = st_col * nbrow;
		size_t B_stream_size = ldu * num_col_stream * sizeof(double);
		size_t C_stream_size = nbrow * num_col_stream * sizeof(double);
		
		assert(ldu*(st_col+num_col_stream) < bigu_size);
		assert(nbrow*(st_col+num_col_stream) < buffer_size);
		
		cudaMemcpyAsync(dB+b_offset, tempu+b_offset, B_stream_size,
				cudaMemcpyHostToDevice, streams[stream_id]);
		
		cublasCheckErrors(
				  cublasSetStream(handle[stream_id],
						  streams[stream_id])
				  );
		
		cublasCheckErrors(
				  cublasDgemm(handle[stream_id],
					      CUBLAS_OP_N, CUBLAS_OP_N,
					      nbrow, num_col_stream, ldu,
                                              &alpha, dA, nbrow,
					      &dB[b_offset], ldu, 
					      &beta, &dC[c_offset],
                                              nbrow)
				  );
		
		checkCuda( cudaMemcpyAsync(tempv1, dC+c_offset,
					   C_stream_size,
					   cudaMemcpyDeviceToHost,
					   streams[stream_id]) );
#else 
		if ( num_col_stream > 0 ) {   
		    my_dgemm_("N", "N", &nbrow, &num_col_stream, &ldu,
			      &alpha, &lusup[luptr+(knsupc-ldu)*nsupr],
			      &nsupr, tempu+ldu*st_col, &ldu, &beta,
			      tempv1, &nbrow, 1, 1);
		}
		
#endif 
		
	    } /* end for i = 1 to num_streams used */
	    
	    int num_col = full_u_cols[jjj_st+ncpu_blks-1];
	    int st_col = 0;        /*special case for cpu */
	    tempv = bigV + nbrow * st_col;
	    tempu = bigU;
	    
	    double tstart = SuperLU_timer_();
#if defined (USE_VENDOR_BLAS)            
	    dgemm_("N", "N", &nbrow, &num_col, &ldu, &alpha,
		  &lusup[luptr+(knsupc-ldu)*nsupr], &nsupr,
		  tempu+ldu*st_col, &ldu, &beta, tempv, &nbrow, 1, 1);
#else
	    dgemm_("N", "N", &nbrow, &num_col, &ldu, &alpha,
		  &lusup[luptr+(knsupc-ldu)*nsupr], &nsupr,
		  tempu+ldu*st_col, &ldu, &beta, tempv, &nbrow);
#endif
	    gemm_timer += SuperLU_timer_() -tstart;
	    stat->ops[FACT] += 2 * nbrow * ldu * full_u_cols[jjj-1];
	    
	    // printf("after dgemm \n");
	    
            /* Now scattering blocks handled by cpu */
            int temp_ncol;
	    
            /* scatter first blocks which cpu has computated*/
            tstart = SuperLU_timer_();

#ifdef _OPENMP
#pragma omp parallel  \
    private(j,iukp,rukp, tempu, tempv, cum_nrow, jb, nsupc,ljb,	\
	    segsize,lead_zero,					\
	    ib, temp_nbrow,ilst,lib,index,			\
	    ijb,fnz,ucol,rel,ldv,lptrj,luptrj,			\
	    nzval,     lb ,                     jj, i)		\
    firstprivate(luptr,lptr) default (shared)
#endif
            {
                int thread_id = omp_get_thread_num();
        
                int* indirect_thread = indirect + ldt*thread_id;
                int* indirect2_thread = indirect2 + ldt*thread_id;
                double* tempv1;
                
                if (ncpu_blks< omp_get_num_threads()) {
                    // TAU_STATIC_TIMER_START("SPECIAL_CPU_SCATTER");
                    
                    for (j = jjj_st; j < jjj_st+ncpu_blks; ++j) {
                        /* code */
                        #ifdef PI_DEBUG
                            printf("scattering %d  block column\n",j);
                        #endif

                        /* == processing each of the remaining columns == */

                        if(j==jjj_st) tempv1 = bigV;
                        else tempv1 = bigV + full_u_cols[j-1]*nbrow;

                        arrive_at_ublock( j,&iukp,&rukp,&jb,&ljb,&nsupc,
					  iukp0,rukp0,usub,perm_u,xsup,grid );

                        cum_nrow =0 ;

                        /* do update with the kth column of L and (k,j)th block of U */
                        lptr = lptr0;
                        luptr = luptr0;

#ifdef _OPENMP
#pragma omp for schedule( SCHEDULE_STRATEGY ) nowait
#endif
                        for (lb = 0; lb < nlb; lb++ ) {
                            int cum_nrow = 0;
                            int temp_nbrow;
                            lptr = lptr0;
                            luptr = luptr0;
                            for (int i = 0; i < lb; ++i) {
                                ib = lsub[lptr];        /* Row block L(i,k). */
                                temp_nbrow = lsub[lptr+1];   /* Number of full rows. */
                                lptr += LB_DESCRIPTOR;  /* Skip descriptor. */
                                lptr += temp_nbrow;
                                luptr += temp_nbrow;
                                cum_nrow +=temp_nbrow;
                            }

                            ib = lsub[lptr];       /* Row block L(i,k). */
                            temp_nbrow = lsub[lptr+1];  /* Number of full rows. */
                            assert(temp_nbrow<=nbrow);

                            lptr += LB_DESCRIPTOR; /* Skip descriptor. */

                            /* Now gather the result into the destination block. */
                            if ( ib < jb ) {  /* A(i,j) is in U. */
                                #ifdef PI_DEBUG
                                    printf("cpu scatter \n");
                                    printf("A(%d,%d) goes to U block %d \n", ib,jb,ljb);
                                #endif

                                tempv = tempv1+cum_nrow;
                                dscatter_u (
						 ib,jb,
						 nsupc,iukp,xsup,
						 klst,nbrow,
						 lptr,temp_nbrow,lsub,
						 usub,tempv,
						 Ufstnz_br_ptr,
						 Unzval_br_ptr,
						 grid
						 );
                            } else {    /* A(i,j) is in L. */
#ifdef PI_DEBUG
                                printf("cpu scatter \n");
                                printf("A(%d,%d) goes to L block %d \n", ib,jb,ljb);
#endif
                                
                                tempv = tempv1+cum_nrow;

                                dscatter_l (
						 ib, ljb,nsupc,iukp,xsup,klst,nbrow,lptr,
						 temp_nbrow,usub,lsub,tempv,
						 indirect_thread,indirect2_thread,
						 Lrowind_bc_ptr,Lnzval_bc_ptr,grid
						 );
                            } /* if ib < jb ... */

                            lptr += temp_nbrow;
                            luptr += temp_nbrow;
                            cum_nrow += temp_nbrow;

                        } /* for lb ... */

                        luptr=luptr0;
                    } /* for j = jjj_st ... */

                    // TAU_STATIC_TIMER_STOP("SPECIAL_CPU_SCATTER");
                } else {
#ifdef _OPENMP
#pragma omp for schedule(SCHEDULE_STRATEGY) nowait
#endif
                    for (j = jjj_st; j < jjj_st+ncpu_blks; ++j) {
                        /* code */
                        #ifdef PI_DEBUG
                            printf("scattering %d  block column\n",j);
                        #endif 

                        /* == processing each of the remaining columns == */
                        if(j==jjj_st) tempv1 = bigV;
                        else tempv1 = bigV + full_u_cols[j-1]*nbrow;

                        arrive_at_ublock( j,&iukp,&rukp,&jb,&ljb,&nsupc,
					  iukp0,rukp0,usub,perm_u,xsup,grid );
                        cum_nrow =0 ;

                        /* do update with the kth column of L and (k,j)th block of U */
                        lptr = lptr0;
                        luptr = luptr0;

                        for (lb = 0; lb < nlb; lb++ ) {
                            ib = lsub[lptr];       /* Row block L(i,k). */
                            temp_nbrow = lsub[lptr+1];  /* Number of full rows. */
                            assert(temp_nbrow<=nbrow);

                            lptr += LB_DESCRIPTOR; /* Skip descriptor. */
#ifdef DGEMM_STAT
			    if(j==jjj_st) {
				temp_ncol = full_u_cols[j];
			    } else {
				temp_ncol = full_u_cols[j]- full_u_cols[j-1];  
			    }
			    printf("%d %d %d \n",temp_nbrow, temp_ncol,ldu);
#endif

			    /* Now gather the result into the destination block. */
			    if ( ib < jb ) {  /* A(i,j) is in U. */
#ifdef PI_DEBUG
				printf("cpu scatter \n");
				printf("A(%d,%d) goes to U block %d \n", ib,jb,ljb);
#endif

				tempv = tempv1+cum_nrow;
                                dscatter_u (
						 ib,jb,
						 nsupc,iukp,xsup,
						 klst,nbrow,
						 lptr,temp_nbrow,lsub,
						 usub,tempv,
						 Ufstnz_br_ptr,
						 Unzval_br_ptr,
						 grid
						 );
			    } else {    /* A(i,j) is in L. */
#ifdef PI_DEBUG
                                printf("cpu scatter \n");
                                printf("A(%d,%d) goes to L block %d \n", ib,jb,ljb);
#endif
                                tempv = tempv1+cum_nrow;

                                dscatter_l (
						 ib, ljb,nsupc,iukp,xsup,klst,nbrow,lptr,
						 temp_nbrow,usub,lsub,tempv,
						 indirect_thread,indirect2_thread,
						 Lrowind_bc_ptr,Lnzval_bc_ptr,grid
						 );
			    } /* if ib < jb ... */

			    lptr += temp_nbrow;
			    luptr += temp_nbrow;
			    cum_nrow += temp_nbrow;
			
			} /* for lb ... */

			luptr=luptr0;
		    } /* for j = jjj_st ... */
		}     /* else if (ncpu_blks >= omp_get_num_threads()) */
	    }         /* parallel region */

	    scatter_timer += SuperLU_timer_() - tstart; 
#ifdef _OPENMP
#pragma omp parallel							\
    private(j,iukp,rukp, tempu, tempv, cum_nrow, jb, nsupc,ljb,		\
	    segsize,lead_zero,						\
	    ib, temp_nbrow,ilst,lib,index,				\
	    ijb,fnz,ucol,rel,ldv,lptrj,luptrj,				\
	    nzval,     lb ,                     jj, i)			\
    firstprivate(luptr,lptr) default (shared)
#endif
            {
                int thread_id = omp_get_thread_num();
        
                int* indirect_thread = indirect + ldt*thread_id;
                int* indirect2_thread = indirect2 + ldt*thread_id;
                double* tempv1;
                for(i = 0; i < num_streams_used; i++) { /* i is private variable */
                    checkCuda(cudaStreamSynchronize (streams[i]));
                    int jjj_st1 = (i==0) ? jjj_st + ncpu_blks : jjj_st + stream_end_col[i-1];
                    int jjj_end = jjj_st + stream_end_col[i];
                    assert(jjj_end-1jjj_st) ;

                    /* now scatter it */
#pragma omp for schedule( SCHEDULE_STRATEGY ) nowait 
                    for (j = jjj_st1; j < jjj_end; ++j) {
                        /* code */
#ifdef PI_DEBUG
			printf("scattering %d  block column\n",j);
#endif 
                        /* == processing each of the remaining columns == */

                        if(j==jjj_st) tempv1 = bigV;
                        else tempv1 = bigV + full_u_cols[j-1]*nbrow;

                        arrive_at_ublock( j,&iukp,&rukp,&jb,&ljb,&nsupc,
					  iukp0,rukp0,usub,perm_u,xsup,grid );
                        cum_nrow =0 ;

                        /* do update with the kth column of L and (k,j)th block of U */
                        lptr = lptr0;
                        luptr = luptr0;
                        for (lb = 0; lb < nlb; lb++) {
                            ib = lsub[lptr];       /* Row block L(i,k). */
                            temp_nbrow = lsub[lptr+1];  /* Number of full rows. */
                            assert(temp_nbrow<=nbrow);

                            lptr += LB_DESCRIPTOR; /* Skip descriptor. */
#ifdef DGEMM_STAT
			    if(j==jjj_st) {
				temp_ncol = full_u_cols[j];
			    } else {
				temp_ncol = full_u_cols[j]- full_u_cols[j-1];  
			    }
			    printf("%d %d %d \n",temp_nbrow, temp_ncol,ldu);
#endif

                            /* Now gather the result into the destination block. */
                            if ( ib < jb ) { /* A(i,j) is in U. */
#ifdef PI_DEBUG
				printf("gpu scatter \n");
				printf("A(%d,%d) goes to U block %d \n", ib,jb,ljb);
#endif
                                tempv = tempv1+cum_nrow;
                                dscatter_u (
						 ib,jb,
						 nsupc,iukp,xsup,
						 klst,nbrow,
						 lptr,temp_nbrow,lsub,
						 usub,tempv,
						 Ufstnz_br_ptr,
						 Unzval_br_ptr,
						 grid
						 );
                            } else {    /* A(i,j) is in L. */
#ifdef PI_DEBUG
                                printf("gpu scatter \n");
                                printf("A(%d,%d) goes to L block %d \n", ib,jb,ljb);
#endif
                                tempv = tempv1+cum_nrow;

                                dscatter_l (
						 ib, ljb,nsupc,iukp,xsup,klst,nbrow,lptr,
						 temp_nbrow,usub,lsub,tempv,
						 indirect_thread,indirect2_thread,
						 Lrowind_bc_ptr,Lnzval_bc_ptr,grid
						 );
                            } /* if ib < jb ... */

                            lptr += temp_nbrow;
                            luptr += temp_nbrow;
                            cum_nrow += temp_nbrow;
			    
                        } /* for lb ... */

                        luptr=luptr0;
                    } /* for j = jjj_st ... */
                    
                } /* end for i = 0 to nstreams */
                // TAU_STATIC_TIMER_STOP("GPU_SCATTER");
                // TAU_STATIC_TIMER_STOP("INSIDE_OMP");
            } /* end pragma omp parallel */
            // TAU_STATIC_TIMER_STOP("OUTSIDE_OMP");
        }  /* end while(jjj0 */

 }   /* if msg1 and msg 2 */



SuperLU_DIST_5.3.0/SRC/zsp_blas2_dist.c0000644013363400111340000003557413233431301016367 0ustar  xiaoyessg/*! \file
Copyright (c) 2003, The Regents of the University of California, through
Lawrence Berkeley National Laboratory (subject to receipt of any required 
approvals from U.S. Dept. of Energy) 

All rights reserved. 

The source code is distributed under BSD license, see the file License.txt
at the top-level directory.
*/
/*! @file
 * \brief Solves one of the systems of equations A*x = b,   or   A'*x = b
 *
 * 
 * -- Distributed SuperLU routine (version 1.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 * 
*/ /* * File name: sp_blas2.c * Purpose: Sparse BLAS 2, using some dense BLAS 2 operations. */ #include "superlu_zdefs.h" /* * Function prototypes */ #ifndef USE_VENDOR_BLAS void zusolve(int, int, doublecomplex*, doublecomplex*); void zlsolve(int, int, doublecomplex*, doublecomplex*); void zmatvec(int, int, int, doublecomplex*, doublecomplex*, doublecomplex*); #endif /*! \brief * *
 *   Purpose
 *   =======
 *
 *   sp_ztrsv() solves one of the systems of equations   
 *       A*x = b,   or   A'*x = b,
 *   where b and x are n element vectors and A is a sparse unit , or   
 *   non-unit, upper or lower triangular matrix.   
 *   No test for singularity or near-singularity is included in this   
 *   routine. Such tests must be performed before calling this routine.   
 *
 *   Parameters   
 *   ==========   
 *
 *   uplo   - (input) char*
 *            On entry, uplo specifies whether the matrix is an upper or   
 *             lower triangular matrix as follows:   
 *                uplo = 'U' or 'u'   A is an upper triangular matrix.   
 *                uplo = 'L' or 'l'   A is a lower triangular matrix.   
 *
 *   trans  - (input) char*
 *             On entry, trans specifies the equations to be solved as   
 *             follows:   
 *                trans = 'N' or 'n'   A*x = b.   
 *                trans = 'T' or 't'   A'*x = b.   
 *                trans = 'C' or 'c'   A'*x = b.   
 *
 *   diag   - (input) char*
 *             On entry, diag specifies whether or not A is unit   
 *             triangular as follows:   
 *                diag = 'U' or 'u'   A is assumed to be unit triangular.   
 *                diag = 'N' or 'n'   A is not assumed to be unit   
 *                                    triangular.   
 *	     
 *   L       - (input) SuperMatrix*
 *	       The factor L from the factorization Pr*A*Pc=L*U. Use
 *             compressed row subscripts storage for supernodes,
 *             i.e., L has types: Stype = SC, Dtype = Z, Mtype = TRLU.
 *
 *   U       - (input) SuperMatrix*
 *	        The factor U from the factorization Pr*A*Pc=L*U.
 *	        U has types: Stype = NC, Dtype = Z, Mtype = TRU.
 *    
 *   x       - (input/output) doublecomplex*
 *             Before entry, the incremented array X must contain the n   
 *             element right-hand side vector b. On exit, X is overwritten 
 *             with the solution vector x.
 *
 *   info    - (output) int*
 *             If *info = -i, the i-th argument had an illegal value.
 * 
*/ int sp_ztrsv_dist(char *uplo, char *trans, char *diag, SuperMatrix *L, SuperMatrix *U, doublecomplex *x, int *info) { #ifdef _CRAY _fcd ftcs1 = _cptofcd("L", strlen("L")), ftcs2 = _cptofcd("N", strlen("N")), ftcs3 = _cptofcd("U", strlen("U")); #endif SCformat *Lstore; NCformat *Ustore; doublecomplex *Lval, *Uval; int incx = 1, incy = 1; doublecomplex alpha = {1.0, 0.0}, beta = {1.0, 0.0}; doublecomplex comp_zero = {0.0, 0.0}; int nrow; int fsupc, nsupr, nsupc, luptr, istart, irow; int i, k, iptr, jcol; doublecomplex *work; flops_t solve_ops; /*extern SuperLUStat_t SuperLUStat;*/ /* Test the input parameters */ *info = 0; if ( strncmp(uplo,"L",1) != 0 && strncmp(uplo, "U", 1) != 0 ) *info = -1; else if ( strncmp(trans, "N", 1) != 0 && strncmp(trans, "T", 1) != 0 ) *info = -2; else if ( strncmp(diag, "U", 1) != 0 && strncmp(diag, "N", 1) !=0 ) *info = -3; else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4; else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5; if ( *info ) { i = -(*info); xerr_dist("sp_ztrsv", &i); return 0; } Lstore = L->Store; Lval = Lstore->nzval; Ustore = U->Store; Uval = Ustore->nzval; solve_ops = 0; if ( !(work = doublecomplexCalloc_dist(L->nrow)) ) ABORT("Malloc fails for work in sp_ztrsv()."); if ( strncmp(trans, "N", 1)==0 ) { /* Form x := inv(A)*x. */ if ( strncmp(uplo, "L", 1)==0 ) { /* Form x := inv(L)*x */ if ( L->nrow == 0 ) return 0; /* Quick return */ for (k = 0; k <= Lstore->nsuper; k++) { fsupc = L_FST_SUPC(k); istart = L_SUB_START(fsupc); nsupr = L_SUB_START(fsupc+1) - istart; nsupc = L_FST_SUPC(k+1) - fsupc; luptr = L_NZ_START(fsupc); nrow = nsupr - nsupc; solve_ops += 4 * nsupc * (nsupc - 1); solve_ops += 8 * nrow * nsupc; if ( nsupc == 1 ) { for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) { irow = L_SUB(iptr); ++luptr; zz_mult(&comp_zero, &x[fsupc], &Lval[luptr]); z_sub(&x[irow], &x[irow], &comp_zero); } } else { #ifdef USE_VENDOR_BLAS #ifdef _CRAY CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx); CGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy); #else ztrsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx, 1, 1, 1); zgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy, 1); #endif #else zlsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]); zmatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc], &x[fsupc], &work[0] ); #endif iptr = istart + nsupc; for (i = 0; i < nrow; ++i, ++iptr) { irow = L_SUB(iptr); z_sub(&x[irow], &x[irow], &work[i]); /* Scatter */ work[i] = comp_zero; } } } /* for k ... */ } else { /* Form x := inv(U)*x */ if ( U->nrow == 0 ) return 0; /* Quick return */ for (k = Lstore->nsuper; k >= 0; k--) { fsupc = L_FST_SUPC(k); nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc); nsupc = L_FST_SUPC(k+1) - fsupc; luptr = L_NZ_START(fsupc); solve_ops += 4 * nsupc * (nsupc + 1); if ( nsupc == 1 ) { slud_z_div(&x[fsupc], &x[fsupc], &Lval[luptr]); for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) { irow = U_SUB(i); zz_mult(&comp_zero, &x[fsupc], &Uval[i]); z_sub(&x[irow], &x[irow], &comp_zero); } } else { #ifdef USE_VENDOR_BLAS #ifdef _CRAY CTRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx); #else ztrsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx, 1, 1, 1); #endif #else zusolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] ); #endif for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) { solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol)); for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) { irow = U_SUB(i); zz_mult(&comp_zero, &x[jcol], &Uval[i]); z_sub(&x[irow], &x[irow], &comp_zero); } } } } /* for k ... */ } } else { /* Form x := inv(A')*x */ if ( strncmp(uplo, "L", 1)==0 ) { /* Form x := inv(L')*x */ if ( L->nrow == 0 ) return 0; /* Quick return */ for (k = Lstore->nsuper; k >= 0; --k) { fsupc = L_FST_SUPC(k); istart = L_SUB_START(fsupc); nsupr = L_SUB_START(fsupc+1) - istart; nsupc = L_FST_SUPC(k+1) - fsupc; luptr = L_NZ_START(fsupc); solve_ops += 8 * (nsupr - nsupc) * nsupc; for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) { iptr = istart + nsupc; for (i = L_NZ_START(jcol) + nsupc; i < L_NZ_START(jcol+1); i++) { irow = L_SUB(iptr); zz_mult(&comp_zero, &x[irow], &Lval[i]); z_sub(&x[jcol], &x[jcol], &comp_zero); iptr++; } } if ( nsupc > 1 ) { solve_ops += 4 * nsupc * (nsupc - 1); #ifdef USE_VENDOR_BLAS #ifdef _CRAY ftcs1 = _cptofcd("L", strlen("L")); ftcs2 = _cptofcd("T", strlen("T")); ftcs3 = _cptofcd("U", strlen("U")); CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx); #else ztrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx, 1, 1, 1); #endif #else ztrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx); #endif } } } else { /* Form x := inv(U')*x */ if ( U->nrow == 0 ) return 0; /* Quick return */ for (k = 0; k <= Lstore->nsuper; k++) { fsupc = L_FST_SUPC(k); nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc); nsupc = L_FST_SUPC(k+1) - fsupc; luptr = L_NZ_START(fsupc); for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) { solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol)); for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) { irow = U_SUB(i); zz_mult(&comp_zero, &x[irow], &Uval[i]); z_sub(&x[jcol], &x[jcol], &comp_zero); } } solve_ops += 4 * nsupc * (nsupc + 1); if ( nsupc == 1 ) { slud_z_div(&x[fsupc], &x[fsupc], &Lval[luptr]); } else { #ifdef USE_VENDOR_BLAS #ifdef _CRAY ftcs1 = _cptofcd("U", strlen("U")); ftcs2 = _cptofcd("T", strlen("T")); ftcs3 = _cptofcd("N", strlen("N")); CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx); #else ztrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx, 1, 1, 1); #endif #else ztrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx); #endif } } /* for k ... */ } } /*SuperLUStat.ops[SOLVE] += solve_ops;*/ SUPERLU_FREE(work); return 0; } /*! \brief
  Purpose   
    =======   

    sp_zgemv()  performs one of the matrix-vector operations   
       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   
    where alpha and beta are scalars, x and y are vectors and A is a
    sparse A->nrow by A->ncol matrix.   

    Parameters   
    ==========   

    TRANS  - (input) char*
             On entry, TRANS specifies the operation to be performed as   
             follows:   
                TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.   
                TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.   
                TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.   

    ALPHA  - (input) doublecomplex
             On entry, ALPHA specifies the scalar alpha.   

    A      - (input) SuperMatrix*
             Before entry, the leading m by n part of the array A must   
             contain the matrix of coefficients.   

    X      - (input) doublecomplex*, array of DIMENSION at least   
             ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'   
             and at least   
             ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.   
             Before entry, the incremented array X must contain the   
             vector x.   

    INCX   - (input) int
             On entry, INCX specifies the increment for the elements of   
             X. INCX must not be zero.   

    BETA   - (input) doublecomplex
             On entry, BETA specifies the scalar beta. When BETA is   
             supplied as zero then Y need not be set on input.   

    Y      - (output) doublecomplex*,  array of DIMENSION at least   
             ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'   
             and at least   
             ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.   
             Before entry with BETA non-zero, the incremented array Y   
             must contain the vector y. On exit, Y is overwritten by the 
             updated vector y.
	     
    INCY   - (input) int
             On entry, INCY specifies the increment for the elements of   
             Y. INCY must not be zero.   

    ==== Sparse Level 2 Blas routine.   
*/ int sp_zgemv_dist(char *trans, doublecomplex alpha, SuperMatrix *A, doublecomplex *x, int incx, doublecomplex beta, doublecomplex *y, int incy) { /* Local variables */ NCformat *Astore; doublecomplex *Aval; int info; doublecomplex temp, temp1; int lenx, leny, i, j, irow; int iy, jx, jy, kx, ky; int notran; doublecomplex comp_zero = {0.0, 0.0}; doublecomplex comp_one = {1.0, 0.0}; notran = (strncmp(trans, "N", 1)==0); Astore = A->Store; Aval = Astore->nzval; /* Test the input parameters */ info = 0; if ( !notran && strncmp(trans, "T", 1) != 0 && strncmp(trans, "C", 1) != 0) info = 1; else if ( A->nrow < 0 || A->ncol < 0 ) info = 3; else if (incx == 0) info = 5; else if (incy == 0) info = 8; if (info != 0) { xerr_dist("sp_zgemv ", &info); return 0; } /* Quick return if possible. */ if (A->nrow == 0 || A->ncol == 0 || z_eq(&alpha, &comp_zero) && z_eq(&beta, &comp_one)) return 0; /* Set LENX and LENY, the lengths of the vectors x and y, and set up the start points in X and Y. */ if ( strncmp(trans, "N", 1)==0 ) { lenx = A->ncol; leny = A->nrow; } else { lenx = A->nrow; leny = A->ncol; } if (incx > 0) kx = 0; else kx = - (lenx - 1) * incx; if (incy > 0) ky = 0; else ky = - (leny - 1) * incy; /* Start the operations. In this version the elements of A are accessed sequentially with one pass through A. */ /* First form y := beta*y. */ if ( !z_eq(&beta, &comp_one) ) { if (incy == 1) { if ( z_eq(&beta, &comp_zero) ) for (i = 0; i < leny; ++i) y[i] = comp_zero; else for (i = 0; i < leny; ++i) zz_mult(&y[i], &beta, &y[i]); } else { iy = ky; if ( z_eq(&beta, &comp_zero) ) for (i = 0; i < leny; ++i) { y[iy] = comp_zero; iy += incy; } else for (i = 0; i < leny; ++i) { zz_mult(&y[iy], &beta, &y[iy]); iy += incy; } } } if ( z_eq(&alpha, &comp_zero) ) return 0; if ( notran ) { /* Form y := alpha*A*x + y. */ jx = kx; if (incy == 1) { for (j = 0; j < A->ncol; ++j) { if ( !z_eq(&x[jx], &comp_zero) ) { zz_mult(&temp, &alpha, &x[jx]); for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; zz_mult(&temp1, &temp, &Aval[i]); z_add(&y[irow], &y[irow], &temp1); } } jx += incx; } } else { ABORT("Not implemented."); } } else { /* Form y := alpha*A'*x + y. */ jy = ky; if (incx == 1) { for (j = 0; j < A->ncol; ++j) { temp = comp_zero; for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; zz_mult(&temp1, &Aval[i], &x[irow]); z_add(&temp, &temp, &temp1); } zz_mult(&temp1, &alpha, &temp); z_add(&y[jy], &y[jy], &temp1); jy += incy; } } else { ABORT("Not implemented."); } } return 0; } /* sp_zgemv */ SuperLU_DIST_5.3.0/SRC/get_perm_c.c0000644013363400111340000004236313233431301015543 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Gets matrix permutation * *
 * -- Distributed SuperLU routine (version 2.1) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley,
 * November 1, 2007
 * Feburary 20, 2008
 * 
* * Last update: 7/27/2011 fix a bug with metis ordering on empty graph. * */ #include "superlu_dist_config.h" #include "superlu_ddefs.h" #include "colamd.h" void get_metis( int_t n, /* dimension of matrix B */ int_t bnz, /* number of nonzeros in matrix A. */ int_t *b_colptr, /* column pointer of size n+1 for matrix B. */ int_t *b_rowind, /* row indices of size bnz for matrix B. */ int_t *perm_c /* out - the column permutation vector. */ ) { #ifdef HAVE_PARMETIS /*#define METISOPTIONS 8*/ #define METISOPTIONS 40 int_t metis_options[METISOPTIONS]; int_t i, nm, numflag = 0; /* C-Style ordering */ int_t *perm, *iperm; int_t *b_colptr_int, *b_rowind_int; extern int check_perm_dist(char *what, int_t n, int_t *perm); extern int METIS_NodeND(int_t*, int_t*, int_t*, int_t*, int_t*, int_t*, int_t*); metis_options[0] = 0; /* Use Defaults for now */ perm = (int_t*) SUPERLU_MALLOC(2*n * sizeof(int_t)); if (!perm) ABORT("SUPERLU_MALLOC fails for perm."); iperm = perm + n; nm = n; #if 0 #if defined(_LONGINT) /* Metis can only take 32-bit integers */ if ( !(b_colptr_int = (int*) SUPERLU_MALLOC((n+1) * sizeof(int))) ) ABORT("SUPERLU_MALLOC fails for b_colptr_int."); for (i = 0; i < n+1; ++i) b_colptr_int[i] = b_colptr[i]; SUPERLU_FREE(b_colptr); if ( !(b_rowind_int = (int*) SUPERLU_MALLOC(bnz * sizeof(int))) ) ABORT("SUPERLU_MALLOC fails for b_rowind_int."); for (i = 0; i < bnz; ++i) b_rowind_int[i] = b_rowind[i]; SUPERLU_FREE(b_rowind); #else b_colptr_int = b_colptr; b_rowind_int = b_rowind; #endif #endif /* Call metis */ #undef USEEND #ifdef USEEND METIS_EdgeND(&nm, b_colptr_int, b_rowind_int, &numflag, metis_options, perm, iperm); #else /* Earlier version 3.x.x */ /* METIS_NodeND(&nm, b_colptr, b_rowind, &numflag, metis_options, perm, iperm);*/ /* Latest version 4.x.x */ METIS_NodeND(&nm, b_colptr, b_rowind, NULL, NULL, perm, iperm); /*check_perm_dist("metis perm", n, perm);*/ #endif /* Copy the permutation vector into SuperLU data structure. */ for (i = 0; i < n; ++i) perm_c[i] = iperm[i]; #if 0 SUPERLU_FREE(b_colptr_int); SUPERLU_FREE(b_rowind_int); #else SUPERLU_FREE(b_colptr); SUPERLU_FREE(b_rowind); #endif SUPERLU_FREE(perm); #endif /* HAVE_PARMETIS */ } void get_colamd_dist( const int m, /* number of rows in matrix A. */ const int n, /* number of columns in matrix A. */ const int nnz,/* number of nonzeros in matrix A. */ int_t *colptr, /* column pointer of size n+1 for matrix A. */ int_t *rowind, /* row indices of size nz for matrix A. */ int_t *perm_c /* out - the column permutation vector. */ ) { int Alen, *A, i, info, *p; double knobs[COLAMD_KNOBS]; int stats[COLAMD_STATS]; Alen = colamd_recommended(nnz, m, n); colamd_set_defaults(knobs); if (!(A = (int *) SUPERLU_MALLOC(Alen * sizeof(int))) ) ABORT("Malloc fails for A[]"); if (!(p = (int *) SUPERLU_MALLOC((n+1) * sizeof(int))) ) ABORT("Malloc fails for p[]"); for (i = 0; i <= n; ++i) p[i] = colptr[i]; for (i = 0; i < nnz; ++i) A[i] = rowind[i]; info = colamd(m, n, Alen, A, p, knobs, stats); if ( info == FALSE ) ABORT("COLAMD failed"); for (i = 0; i < n; ++i) perm_c[p[i]] = i; SUPERLU_FREE(A); SUPERLU_FREE(p); } /*! \brief * *
 * Purpose
 * =======
 *
 * Form the structure of A'*A. A is an m-by-n matrix in column oriented
 * format represented by (colptr, rowind). The output A'*A is in column
 * oriented format (symmetrically, also row oriented), represented by
 * (ata_colptr, ata_rowind).
 *
 * This routine is modified from GETATA routine by Tim Davis.
 * The complexity of this algorithm is: SUM_{i=1,m} r(i)^2,
 * i.e., the sum of the square of the row counts.
 *
 * Questions
 * =========
 *     o  Do I need to withhold the *dense* rows?
 *     o  How do I know the number of nonzeros in A'*A?
 * 
*/ void getata_dist( const int_t m, /* number of rows in matrix A. */ const int_t n, /* number of columns in matrix A. */ const int_t nz, /* number of nonzeros in matrix A */ int_t *colptr, /* column pointer of size n+1 for matrix A. */ int_t *rowind, /* row indices of size nz for matrix A. */ int_t *atanz, /* out - on exit, returns the actual number of nonzeros in matrix A'*A. */ int_t **ata_colptr, /* out - size n+1 */ int_t **ata_rowind /* out - size *atanz */ ) { register int_t i, j, k, col, num_nz, ti, trow; int_t *marker, *b_colptr, *b_rowind; int_t *t_colptr, *t_rowind; /* a column oriented form of T = A' */ if ( !(marker = (int_t*) SUPERLU_MALLOC( (SUPERLU_MAX(m,n)+1) * sizeof(int_t)) ) ) ABORT("SUPERLU_MALLOC fails for marker[]"); if ( !(t_colptr = (int_t*) SUPERLU_MALLOC( (m+1) * sizeof(int_t)) ) ) ABORT("SUPERLU_MALLOC t_colptr[]"); if ( !(t_rowind = (int_t*) SUPERLU_MALLOC( nz * sizeof(int_t)) ) ) ABORT("SUPERLU_MALLOC fails for t_rowind[]"); /* Get counts of each column of T, and set up column pointers */ for (i = 0; i < m; ++i) marker[i] = 0; for (j = 0; j < n; ++j) { for (i = colptr[j]; i < colptr[j+1]; ++i) ++marker[rowind[i]]; } t_colptr[0] = 0; for (i = 0; i < m; ++i) { t_colptr[i+1] = t_colptr[i] + marker[i]; marker[i] = t_colptr[i]; } /* Transpose the matrix from A to T */ for (j = 0; j < n; ++j) for (i = colptr[j]; i < colptr[j+1]; ++i) { col = rowind[i]; t_rowind[marker[col]] = j; ++marker[col]; } /* ---------------------------------------------------------------- compute B = T * A, where column j of B is: Struct (B_*j) = UNION ( Struct (T_*k) ) A_kj != 0 do not include the diagonal entry ( Partition A as: A = (A_*1, ..., A_*n) Then B = T * A = (T * A_*1, ..., T * A_*n), where T * A_*j = (T_*1, ..., T_*m) * A_*j. ) ---------------------------------------------------------------- */ /* Zero the diagonal flag */ for (i = 0; i < n; ++i) marker[i] = -1; /* First pass determines number of nonzeros in B */ num_nz = 0; for (j = 0; j < n; ++j) { /* Flag the diagonal so it's not included in the B matrix */ marker[j] = j; for (i = colptr[j]; i < colptr[j+1]; ++i) { /* A_kj is nonzero, add pattern of column T_*k to B_*j */ k = rowind[i]; for (ti = t_colptr[k]; ti < t_colptr[k+1]; ++ti) { trow = t_rowind[ti]; if ( marker[trow] != j ) { marker[trow] = j; num_nz++; } } } } *atanz = num_nz; /* Allocate storage for A'*A */ if ( !(*ata_colptr = (int_t*) SUPERLU_MALLOC( (n+1) * sizeof(int_t)) ) ) ABORT("SUPERLU_MALLOC fails for ata_colptr[]"); if ( *atanz ) { if ( !(*ata_rowind = (int_t*)SUPERLU_MALLOC(*atanz*sizeof(int_t)) ) ) { fprintf(stderr, ".. atanz = %lld\n", (long long) *atanz); ABORT("SUPERLU_MALLOC fails for ata_rowind[]"); } } b_colptr = *ata_colptr; /* aliasing */ b_rowind = *ata_rowind; /* Zero the diagonal flag */ for (i = 0; i < n; ++i) marker[i] = -1; /* Compute each column of B, one at a time */ num_nz = 0; for (j = 0; j < n; ++j) { b_colptr[j] = num_nz; /* Flag the diagonal so it's not included in the B matrix */ marker[j] = j; for (i = colptr[j]; i < colptr[j+1]; ++i) { /* A_kj is nonzero, add pattern of column T_*k to B_*j */ k = rowind[i]; for (ti = t_colptr[k]; ti < t_colptr[k+1]; ++ti) { trow = t_rowind[ti]; if ( marker[trow] != j ) { marker[trow] = j; b_rowind[num_nz++] = trow; } } } } b_colptr[n] = num_nz; SUPERLU_FREE(marker); SUPERLU_FREE(t_colptr); SUPERLU_FREE(t_rowind); } /*! \brief * *
 * Purpose
 * =======
 *
 * Form the structure of A'+A. A is an n-by-n matrix in column oriented
 * format represented by (colptr, rowind). The output A'+A is in column
 * oriented format (symmetrically, also row oriented), represented by
 * (b_colptr, b_rowind).
 * 
*/ void at_plus_a_dist( const int_t n, /* number of columns in matrix A. */ const int_t nz, /* number of nonzeros in matrix A */ int_t *colptr, /* column pointer of size n+1 for matrix A. */ int_t *rowind, /* row indices of size nz for matrix A. */ int_t *bnz, /* out - on exit, returns the actual number of nonzeros in matrix A'+A. */ int_t **b_colptr, /* out - size n+1 */ int_t **b_rowind /* out - size *bnz */ ) { register int_t i, j, k, col, num_nz; int_t *t_colptr, *t_rowind; /* a column oriented form of T = A' */ int_t *marker; if ( !(marker = (int_t*) SUPERLU_MALLOC( n * sizeof(int_t)) ) ) ABORT("SUPERLU_MALLOC fails for marker[]"); if ( !(t_colptr = (int_t*) SUPERLU_MALLOC( (n+1) * sizeof(int_t)) ) ) ABORT("SUPERLU_MALLOC fails for t_colptr[]"); if ( !(t_rowind = (int_t*) SUPERLU_MALLOC( nz * sizeof(int_t)) ) ) ABORT("SUPERLU_MALLOC fails t_rowind[]"); /* Get counts of each column of T, and set up column pointers */ for (i = 0; i < n; ++i) marker[i] = 0; for (j = 0; j < n; ++j) { for (i = colptr[j]; i < colptr[j+1]; ++i) ++marker[rowind[i]]; } t_colptr[0] = 0; for (i = 0; i < n; ++i) { t_colptr[i+1] = t_colptr[i] + marker[i]; marker[i] = t_colptr[i]; } /* Transpose the matrix from A to T */ for (j = 0; j < n; ++j) { for (i = colptr[j]; i < colptr[j+1]; ++i) { col = rowind[i]; t_rowind[marker[col]] = j; ++marker[col]; } } /* ---------------------------------------------------------------- compute B = A + T, where column j of B is: Struct (B_*j) = Struct (A_*k) UNION Struct (T_*k) do not include the diagonal entry ---------------------------------------------------------------- */ /* Zero the diagonal flag */ for (i = 0; i < n; ++i) marker[i] = -1; /* First pass determines number of nonzeros in B */ num_nz = 0; for (j = 0; j < n; ++j) { /* Flag the diagonal so it's not included in the B matrix */ marker[j] = j; /* Add pattern of column A_*k to B_*j */ for (i = colptr[j]; i < colptr[j+1]; ++i) { k = rowind[i]; if ( marker[k] != j ) { marker[k] = j; ++num_nz; } } /* Add pattern of column T_*k to B_*j */ for (i = t_colptr[j]; i < t_colptr[j+1]; ++i) { k = t_rowind[i]; if ( marker[k] != j ) { marker[k] = j; ++num_nz; } } } *bnz = num_nz; /* Allocate storage for A+A' */ if ( !(*b_colptr = (int_t*) SUPERLU_MALLOC( (n+1) * sizeof(int_t)) ) ) ABORT("SUPERLU_MALLOC fails for b_colptr[]"); if ( *bnz ) { if ( !(*b_rowind = (int_t*) SUPERLU_MALLOC( *bnz * sizeof(int_t)) ) ) ABORT("SUPERLU_MALLOC fails for b_rowind[]"); } /* Zero the diagonal flag */ for (i = 0; i < n; ++i) marker[i] = -1; /* Compute each column of B, one at a time */ num_nz = 0; for (j = 0; j < n; ++j) { (*b_colptr)[j] = num_nz; /* Flag the diagonal so it's not included in the B matrix */ marker[j] = j; /* Add pattern of column A_*k to B_*j */ for (i = colptr[j]; i < colptr[j+1]; ++i) { k = rowind[i]; if ( marker[k] != j ) { marker[k] = j; (*b_rowind)[num_nz++] = k; } } /* Add pattern of column T_*k to B_*j */ for (i = t_colptr[j]; i < t_colptr[j+1]; ++i) { k = t_rowind[i]; if ( marker[k] != j ) { marker[k] = j; (*b_rowind)[num_nz++] = k; } } } (*b_colptr)[n] = num_nz; SUPERLU_FREE(marker); SUPERLU_FREE(t_colptr); SUPERLU_FREE(t_rowind); } /* at_plus_a_dist */ /*! \brief * *
 * Purpose
 * =======
 *
 * GET_PERM_C_DIST obtains a permutation matrix Pc, by applying the multiple
 * minimum degree ordering code by Joseph Liu to matrix A'*A or A+A',
 * or using approximate minimum degree column ordering by Davis et. al.
 * The LU factorization of A*Pc tends to have less fill than the LU 
 * factorization of A.
 *
 * Arguments
 * =========
 *
 * ispec   (input) colperm_t
 *         Specifies what type of column permutation to use to reduce fill.
 *         = NATURAL: natural ordering (i.e., Pc = I)
 *         = MMD_AT_PLUS_A: minimum degree ordering on structure of A'+A
 *         = MMD_ATA: minimum degree ordering on structure of A'*A
 *         = METIS_AT_PLUS_A: MeTis on A'+A
 * 
 * A       (input) SuperMatrix*
 *         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
 *         of the linear equations is A->nrow. Currently, the type of A 
 *         can be: Stype = SLU_NC; Dtype = SLU_D; Mtype = SLU_GE.
 *         In the future, more general A can be handled.
 *
 * perm_c  (output) int*
 *	   Column permutation vector of size A->ncol, which defines the 
 *         permutation matrix Pc; perm_c[i] = j means column i of A is 
 *         in position j in A*Pc.
 * 
*/ void get_perm_c_dist(int_t pnum, int_t ispec, SuperMatrix *A, int_t *perm_c) { NCformat *Astore = A->Store; int_t m, n, bnz = 0, *b_colptr, *b_rowind, i; int_t delta, maxint, nofsub, *invp; int_t *dhead, *qsize, *llist, *marker; double t, SuperLU_timer_(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC((int)pnum, "Enter get_perm_c_dist()"); #endif m = A->nrow; n = A->ncol; t = SuperLU_timer_(); switch ( ispec ) { case NATURAL: /* Natural ordering */ for (i = 0; i < n; ++i) perm_c[i] = i; #if ( PRNTlevel>=1 ) if ( !pnum ) printf(".. Use natural column ordering\n"); #endif return; case MMD_AT_PLUS_A: /* Minimum degree ordering on A'+A */ if ( m != n ) ABORT("Matrix is not square"); at_plus_a_dist(n, Astore->nnz, Astore->colptr, Astore->rowind, &bnz, &b_colptr, &b_rowind); t = SuperLU_timer_() - t; /*printf("Form A'+A time = %8.3f\n", t);*/ #if ( PRNTlevel>=1 ) if ( !pnum ) printf(".. Use minimum degree ordering on A'+A.\n"); #endif break; case MMD_ATA: /* Minimum degree ordering on A'*A */ getata_dist(m, n, Astore->nnz, Astore->colptr, Astore->rowind, &bnz, &b_colptr, &b_rowind); t = SuperLU_timer_() - t; /*printf("Form A'*A time = %8.3f\n", t);*/ #if ( PRNTlevel>=1 ) if ( !pnum ) printf(".. Use minimum degree ordering on A'*A\n"); #endif break; case (COLAMD): /* Approximate minimum degree column ordering. */ get_colamd_dist(m, n, Astore->nnz, Astore->colptr, Astore->rowind, perm_c); #if ( PRNTlevel>=1 ) printf(".. Use approximate minimum degree column ordering.\n"); #endif return; #ifdef HAVE_PARMETIS case METIS_AT_PLUS_A: /* METIS ordering on A'+A */ if ( m != n ) ABORT("Matrix is not square"); at_plus_a_dist(n, Astore->nnz, Astore->colptr, Astore->rowind, &bnz, &b_colptr, &b_rowind); if ( bnz ) { /* non-empty adjacency structure */ get_metis(n, bnz, b_colptr, b_rowind, perm_c); } else { /* e.g., diagonal matrix */ for (i = 0; i < n; ++i) perm_c[i] = i; SUPERLU_FREE(b_colptr); /* b_rowind is not allocated in this case */ } #if ( PRNTlevel>=1 ) if ( !pnum ) printf(".. Use METIS ordering on A'+A\n"); #endif return; #endif default: ABORT("Invalid ISPEC"); } if ( bnz ) { t = SuperLU_timer_(); /* Initialize and allocate storage for GENMMD. */ delta = 0; /* DELTA is a parameter to allow the choice of nodes whose degree <= min-degree + DELTA. */ maxint = 2147483647; /* 2**31 - 1 */ invp = (int_t *) SUPERLU_MALLOC((n+delta)*sizeof(int_t)); if ( !invp ) ABORT("SUPERLU_MALLOC fails for invp."); dhead = (int_t *) SUPERLU_MALLOC((n+delta)*sizeof(int_t)); if ( !dhead ) ABORT("SUPERLU_MALLOC fails for dhead."); qsize = (int_t *) SUPERLU_MALLOC((n+delta)*sizeof(int_t)); if ( !qsize ) ABORT("SUPERLU_MALLOC fails for qsize."); llist = (int_t *) SUPERLU_MALLOC(n*sizeof(int_t)); if ( !llist ) ABORT("SUPERLU_MALLOC fails for llist."); marker = (int_t *) SUPERLU_MALLOC(n*sizeof(int_t)); if ( !marker ) ABORT("SUPERLU_MALLOC fails for marker."); /* Transform adjacency list into 1-based indexing required by GENMMD.*/ for (i = 0; i <= n; ++i) ++b_colptr[i]; for (i = 0; i < bnz; ++i) ++b_rowind[i]; genmmd_dist_(&n, b_colptr, b_rowind, perm_c, invp, &delta, dhead, qsize, llist, marker, &maxint, &nofsub); /* Transform perm_c into 0-based indexing. */ for (i = 0; i < n; ++i) --perm_c[i]; SUPERLU_FREE(invp); SUPERLU_FREE(dhead); SUPERLU_FREE(qsize); SUPERLU_FREE(llist); SUPERLU_FREE(marker); SUPERLU_FREE(b_rowind); t = SuperLU_timer_() - t; /* printf("call GENMMD time = %8.3f\n", t);*/ } else { /* Empty adjacency structure */ for (i = 0; i < n; ++i) perm_c[i] = i; } SUPERLU_FREE(b_colptr); #if ( DEBUGlevel>=1 ) CHECK_MALLOC((int) pnum, "Exit get_perm_c_dist()"); #endif } /* get_perm_c_dist */ SuperLU_DIST_5.3.0/SRC/pdutil.c0000644013363400111340000004430513233431301014736 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Several matrix utilities * *
 * -- Distributed SuperLU routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * March 15, 2003
 * 
*/ #include #include "superlu_ddefs.h" /*! \brief Gather A from the distributed compressed row format to global A in compressed column format. */ int pdCompRow_loc_to_CompCol_global ( int_t need_value, /* Input. Whether need to gather numerical values */ SuperMatrix *A, /* Input. Distributed matrix in NRformat_loc format. */ gridinfo_t *grid, /* Input */ SuperMatrix *GA /* Output */ ) { NRformat_loc *Astore; NCformat *GAstore; double *a, *a_loc; int_t *colind, *rowptr; int_t *colptr_loc, *rowind_loc; int_t m_loc, n, i, j, k, l; int_t colnnz, fst_row, nnz_loc, nnz; double *a_recv; /* Buffer to receive the blocks of values. */ double *a_buf; /* Buffer to merge blocks into block columns. */ int_t *itemp; int_t *colptr_send; /* Buffer to redistribute the column pointers of the local block rows. Use n_loc+1 pointers for each block. */ int_t *colptr_blk; /* The column pointers for each block, after redistribution to the local block columns. Use n_loc+1 pointers for each block. */ int_t *rowind_recv; /* Buffer to receive the blocks of row indices. */ int_t *rowind_buf; /* Buffer to merge blocks into block columns. */ int_t *fst_rows, *n_locs; int *sendcnts, *sdispls, *recvcnts, *rdispls, *itemp_32; int it, n_loc, procs; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Enter pdCompRow_loc_to_CompCol_global"); #endif /* Initialization. */ n = A->ncol; Astore = (NRformat_loc *) A->Store; nnz_loc = Astore->nnz_loc; m_loc = Astore->m_loc; fst_row = Astore->fst_row; a = Astore->nzval; rowptr = Astore->rowptr; colind = Astore->colind; n_loc = m_loc; /* NOTE: CURRENTLY ONLY WORK FOR SQUARE MATRIX */ /* ------------------------------------------------------------ FIRST PHASE: TRANSFORM A INTO DISTRIBUTED COMPRESSED COLUMN. ------------------------------------------------------------*/ dCompRow_to_CompCol_dist(m_loc, n, nnz_loc, a, colind, rowptr, &a_loc, &rowind_loc, &colptr_loc); /* Change local row index numbers to global numbers. */ for (i = 0; i < nnz_loc; ++i) rowind_loc[i] += fst_row; #if ( DEBUGlevel>=2 ) printf("Proc %d\n", grid->iam); PrintInt10("rowind_loc", nnz_loc, rowind_loc); PrintInt10("colptr_loc", n+1, colptr_loc); #endif procs = grid->nprow * grid->npcol; if ( !(fst_rows = (int_t *) intMalloc_dist(2*procs)) ) ABORT("Malloc fails for fst_rows[]"); n_locs = fst_rows + procs; MPI_Allgather(&fst_row, 1, mpi_int_t, fst_rows, 1, mpi_int_t, grid->comm); for (i = 0; i < procs-1; ++i) n_locs[i] = fst_rows[i+1] - fst_rows[i]; n_locs[procs-1] = n - fst_rows[procs-1]; if ( !(recvcnts = SUPERLU_MALLOC(5*procs * sizeof(int))) ) ABORT("Malloc fails for recvcnts[]"); sendcnts = recvcnts + procs; rdispls = sendcnts + procs; sdispls = rdispls + procs; itemp_32 = sdispls + procs; /* All-to-all transfer column pointers of each block. Now the matrix view is P-by-P block-partition. */ /* n column starts for each column, and procs column ends for each block */ if ( !(colptr_send = intMalloc_dist(n + procs)) ) ABORT("Malloc fails for colptr_send[]"); if ( !(colptr_blk = intMalloc_dist( (((size_t) n_loc)+1)*procs)) ) ABORT("Malloc fails for colptr_blk[]"); for (i = 0, j = 0; i < procs; ++i) { for (k = j; k < j + n_locs[i]; ++k) colptr_send[i+k] = colptr_loc[k]; colptr_send[i+k] = colptr_loc[k]; /* Add an END marker */ sendcnts[i] = n_locs[i] + 1; #if ( DEBUGlevel>=1 ) assert(j == fst_rows[i]); #endif sdispls[i] = j + i; recvcnts[i] = n_loc + 1; rdispls[i] = i * (n_loc + 1); j += n_locs[i]; /* First column of next block in colptr_loc[] */ } MPI_Alltoallv(colptr_send, sendcnts, sdispls, mpi_int_t, colptr_blk, recvcnts, rdispls, mpi_int_t, grid->comm); /* Adjust colptr_blk[] so that they contain the local indices of the column pointers in the receive buffer. */ nnz = 0; /* The running sum of the nonzeros counted by far */ k = 0; for (i = 0; i < procs; ++i) { for (j = rdispls[i]; j < rdispls[i] + n_loc; ++j) { colnnz = colptr_blk[j+1] - colptr_blk[j]; /*assert(k<=j);*/ colptr_blk[k] = nnz; nnz += colnnz; /* Start of the next column */ ++k; } colptr_blk[k++] = nnz; /* Add an END marker for each block */ } /*assert(k == (n_loc+1)*procs);*/ /* Now prepare to transfer row indices and values. */ sdispls[0] = 0; for (i = 0; i < procs-1; ++i) { sendcnts[i] = colptr_loc[fst_rows[i+1]] - colptr_loc[fst_rows[i]]; sdispls[i+1] = sdispls[i] + sendcnts[i]; } sendcnts[procs-1] = colptr_loc[n] - colptr_loc[fst_rows[procs-1]]; for (i = 0; i < procs; ++i) { j = rdispls[i]; /* Point to this block in colptr_blk[]. */ recvcnts[i] = colptr_blk[j+n_loc] - colptr_blk[j]; } rdispls[0] = 0; /* Recompute rdispls[] for row indices. */ for (i = 0; i < procs-1; ++i) rdispls[i+1] = rdispls[i] + recvcnts[i]; k = rdispls[procs-1] + recvcnts[procs-1]; /* Total received */ if ( !(rowind_recv = (int_t *) intMalloc_dist(2*k)) ) ABORT("Malloc fails for rowind_recv[]"); rowind_buf = rowind_recv + k; MPI_Alltoallv(rowind_loc, sendcnts, sdispls, mpi_int_t, rowind_recv, recvcnts, rdispls, mpi_int_t, grid->comm); if ( need_value ) { if ( !(a_recv = (double *) doubleMalloc_dist(2*k)) ) ABORT("Malloc fails for rowind_recv[]"); a_buf = a_recv + k; MPI_Alltoallv(a_loc, sendcnts, sdispls, MPI_DOUBLE, a_recv, recvcnts, rdispls, MPI_DOUBLE, grid->comm); } /* Reset colptr_loc[] to point to the n_loc global columns. */ colptr_loc[0] = 0; itemp = colptr_send; for (j = 0; j < n_loc; ++j) { colnnz = 0; for (i = 0; i < procs; ++i) { k = i * (n_loc + 1) + j; /* j-th column in i-th block */ colnnz += colptr_blk[k+1] - colptr_blk[k]; } colptr_loc[j+1] = colptr_loc[j] + colnnz; itemp[j] = colptr_loc[j]; /* Save a copy of the column starts */ } itemp[n_loc] = colptr_loc[n_loc]; /* Merge blocks of row indices into columns of row indices. */ for (i = 0; i < procs; ++i) { k = i * (n_loc + 1); for (j = 0; j < n_loc; ++j) { /* i-th block */ for (l = colptr_blk[k+j]; l < colptr_blk[k+j+1]; ++l) { rowind_buf[itemp[j]] = rowind_recv[l]; ++itemp[j]; } } } if ( need_value ) { for (j = 0; j < n_loc+1; ++j) itemp[j] = colptr_loc[j]; for (i = 0; i < procs; ++i) { k = i * (n_loc + 1); for (j = 0; j < n_loc; ++j) { /* i-th block */ for (l = colptr_blk[k+j]; l < colptr_blk[k+j+1]; ++l) { a_buf[itemp[j]] = a_recv[l]; ++itemp[j]; } } } } /* ------------------------------------------------------------ SECOND PHASE: GATHER TO GLOBAL A IN COMPRESSED COLUMN FORMAT. ------------------------------------------------------------*/ GA->nrow = A->nrow; GA->ncol = A->ncol; GA->Stype = SLU_NC; GA->Dtype = A->Dtype; GA->Mtype = A->Mtype; GAstore = GA->Store = (NCformat *) SUPERLU_MALLOC ( sizeof(NCformat) ); if ( !GAstore ) ABORT ("SUPERLU_MALLOC fails for GAstore"); /* First gather the size of each piece. */ nnz_loc = colptr_loc[n_loc]; MPI_Allgather(&nnz_loc, 1, mpi_int_t, itemp, 1, mpi_int_t, grid->comm); for (i = 0, nnz = 0; i < procs; ++i) nnz += itemp[i]; GAstore->nnz = nnz; if ( !(GAstore->rowind = (int_t *) intMalloc_dist (nnz)) ) ABORT ("SUPERLU_MALLOC fails for GAstore->rowind[]"); if ( !(GAstore->colptr = (int_t *) intMalloc_dist (n+1)) ) ABORT ("SUPERLU_MALLOC fails for GAstore->colptr[]"); /* Allgatherv for row indices. */ rdispls[0] = 0; for (i = 0; i < procs-1; ++i) { rdispls[i+1] = rdispls[i] + itemp[i]; itemp_32[i] = itemp[i]; } itemp_32[procs-1] = itemp[procs-1]; it = nnz_loc; MPI_Allgatherv(rowind_buf, it, mpi_int_t, GAstore->rowind, itemp_32, rdispls, mpi_int_t, grid->comm); if ( need_value ) { if ( !(GAstore->nzval = (double *) doubleMalloc_dist (nnz)) ) ABORT ("SUPERLU_MALLOC fails for GAstore->rnzval[]"); MPI_Allgatherv(a_buf, it, MPI_DOUBLE, GAstore->nzval, itemp_32, rdispls, MPI_DOUBLE, grid->comm); } else GAstore->nzval = NULL; /* Now gather the column pointers. */ rdispls[0] = 0; for (i = 0; i < procs-1; ++i) { rdispls[i+1] = rdispls[i] + n_locs[i]; itemp_32[i] = n_locs[i]; } itemp_32[procs-1] = n_locs[procs-1]; MPI_Allgatherv(colptr_loc, n_loc, mpi_int_t, GAstore->colptr, itemp_32, rdispls, mpi_int_t, grid->comm); /* Recompute column pointers. */ for (i = 1; i < procs; ++i) { k = rdispls[i]; for (j = 0; j < n_locs[i]; ++j) GAstore->colptr[k++] += itemp[i-1]; itemp[i] += itemp[i-1]; /* prefix sum */ } GAstore->colptr[n] = nnz; #if ( DEBUGlevel>=2 ) if ( !grid->iam ) { printf("After pdCompRow_loc_to_CompCol_global()\n"); dPrint_CompCol_Matrix_dist(GA); } #endif SUPERLU_FREE(a_loc); SUPERLU_FREE(rowind_loc); SUPERLU_FREE(colptr_loc); SUPERLU_FREE(fst_rows); SUPERLU_FREE(recvcnts); SUPERLU_FREE(colptr_send); SUPERLU_FREE(colptr_blk); SUPERLU_FREE(rowind_recv); if ( need_value) SUPERLU_FREE(a_recv); #if ( DEBUGlevel>=1 ) if ( !grid->iam ) printf("sizeof(NCformat) %lu\n", sizeof(NCformat)); CHECK_MALLOC(grid->iam, "Exit pdCompRow_loc_to_CompCol_global"); #endif return 0; } /* pdCompRow_loc_to_CompCol_global */ /*! \brief Permute the distributed dense matrix: B <= perm(X). perm[i] = j means the i-th row of X is in the j-th row of B. */ int pdPermute_Dense_Matrix ( int_t fst_row, int_t m_loc, int_t row_to_proc[], int_t perm[], double X[], int ldx, double B[], int ldb, int nrhs, gridinfo_t *grid ) { int_t i, j, k, l; int p, procs; int *sendcnts, *sendcnts_nrhs, *recvcnts, *recvcnts_nrhs; int *sdispls, *sdispls_nrhs, *rdispls, *rdispls_nrhs; int *ptr_to_ibuf, *ptr_to_dbuf; int_t *send_ibuf, *recv_ibuf; double *send_dbuf, *recv_dbuf; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Enter pdPermute_Dense_Matrix()"); #endif procs = grid->nprow * grid->npcol; if ( !(sendcnts = SUPERLU_MALLOC(10*procs * sizeof(int))) ) ABORT("Malloc fails for sendcnts[]."); sendcnts_nrhs = sendcnts + procs; recvcnts = sendcnts_nrhs + procs; recvcnts_nrhs = recvcnts + procs; sdispls = recvcnts_nrhs + procs; sdispls_nrhs = sdispls + procs; rdispls = sdispls_nrhs + procs; rdispls_nrhs = rdispls + procs; ptr_to_ibuf = rdispls_nrhs + procs; ptr_to_dbuf = ptr_to_ibuf + procs; for (i = 0; i < procs; ++i) sendcnts[i] = 0; /* Count the number of X entries to be sent to each process.*/ for (i = fst_row; i < fst_row + m_loc; ++i) { p = row_to_proc[perm[i]]; ++sendcnts[p]; } MPI_Alltoall(sendcnts, 1, MPI_INT, recvcnts, 1, MPI_INT, grid->comm); sdispls[0] = rdispls[0] = 0; sdispls_nrhs[0] = rdispls_nrhs[0] = 0; sendcnts_nrhs[0] = sendcnts[0] * nrhs; recvcnts_nrhs[0] = recvcnts[0] * nrhs; for (i = 1; i < procs; ++i) { sdispls[i] = sdispls[i-1] + sendcnts[i-1]; sdispls_nrhs[i] = sdispls[i] * nrhs; rdispls[i] = rdispls[i-1] + recvcnts[i-1]; rdispls_nrhs[i] = rdispls[i] * nrhs; sendcnts_nrhs[i] = sendcnts[i] * nrhs; recvcnts_nrhs[i] = recvcnts[i] * nrhs; } k = sdispls[procs-1] + sendcnts[procs-1];/* Total number of sends */ l = rdispls[procs-1] + recvcnts[procs-1];/* Total number of recvs */ /*assert(k == m_loc);*/ /*assert(l == m_loc);*/ if ( !(send_ibuf = intMalloc_dist(k + l)) ) ABORT("Malloc fails for send_ibuf[]."); recv_ibuf = send_ibuf + k; if ( !(send_dbuf = doubleMalloc_dist((k + l)*nrhs)) ) ABORT("Malloc fails for send_dbuf[]."); recv_dbuf = send_dbuf + k * nrhs; for (i = 0; i < procs; ++i) { ptr_to_ibuf[i] = sdispls[i]; ptr_to_dbuf[i] = sdispls_nrhs[i]; } /* Fill in the send buffers: send_ibuf[] and send_dbuf[]. */ for (i = fst_row; i < fst_row + m_loc; ++i) { j = perm[i]; p = row_to_proc[j]; send_ibuf[ptr_to_ibuf[p]] = j; j = ptr_to_dbuf[p]; RHS_ITERATE(k) { /* RHS stored in row major in the buffer */ send_dbuf[j++] = X[i-fst_row + k*ldx]; } ++ptr_to_ibuf[p]; ptr_to_dbuf[p] += nrhs; } /* Transfer the (permuted) row indices and numerical values. */ MPI_Alltoallv(send_ibuf, sendcnts, sdispls, mpi_int_t, recv_ibuf, recvcnts, rdispls, mpi_int_t, grid->comm); MPI_Alltoallv(send_dbuf, sendcnts_nrhs, sdispls_nrhs, MPI_DOUBLE, recv_dbuf, recvcnts_nrhs, rdispls_nrhs, MPI_DOUBLE, grid->comm); /* Copy the buffer into b. */ for (i = 0, l = 0; i < m_loc; ++i) { j = recv_ibuf[i] - fst_row; /* Relative row number */ RHS_ITERATE(k) { /* RHS stored in row major in the buffer */ B[j + k*ldb] = recv_dbuf[l++]; } } SUPERLU_FREE(sendcnts); SUPERLU_FREE(send_ibuf); SUPERLU_FREE(send_dbuf); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Exit pdPermute_Dense_Matrix()"); #endif return 0; } /* pdPermute_Dense_Matrix */ /*! \brief Initialize the data structure for the solution phase. */ int dSolveInit(superlu_dist_options_t *options, SuperMatrix *A, int_t perm_r[], int_t perm_c[], int_t nrhs, LUstruct_t *LUstruct, gridinfo_t *grid, SOLVEstruct_t *SOLVEstruct) { int_t *row_to_proc, *inv_perm_c, *itemp; NRformat_loc *Astore; int_t i, fst_row, m_loc, p; int procs; Astore = (NRformat_loc *) A->Store; fst_row = Astore->fst_row; m_loc = Astore->m_loc; procs = grid->nprow * grid->npcol; if ( !(row_to_proc = intMalloc_dist(A->nrow)) ) ABORT("Malloc fails for row_to_proc[]"); SOLVEstruct->row_to_proc = row_to_proc; if ( !(inv_perm_c = intMalloc_dist(A->ncol)) ) ABORT("Malloc fails for inv_perm_c[]."); for (i = 0; i < A->ncol; ++i) inv_perm_c[perm_c[i]] = i; SOLVEstruct->inv_perm_c = inv_perm_c; /* ------------------------------------------------------------ EVERY PROCESS NEEDS TO KNOW GLOBAL PARTITION. SET UP THE MAPPING BETWEEN ROWS AND PROCESSES. NOTE: For those processes that do not own any row, it must must be set so that fst_row == A->nrow. ------------------------------------------------------------*/ if ( !(itemp = intMalloc_dist(procs+1)) ) ABORT("Malloc fails for itemp[]"); MPI_Allgather(&fst_row, 1, mpi_int_t, itemp, 1, mpi_int_t, grid->comm); itemp[procs] = A->nrow; for (p = 0; p < procs; ++p) { for (i = itemp[p] ; i < itemp[p+1]; ++i) row_to_proc[i] = p; } #if ( DEBUGlevel>=2 ) if ( !grid->iam ) { printf("fst_row = %d\n", fst_row); PrintInt10("row_to_proc", A->nrow, row_to_proc); PrintInt10("inv_perm_c", A->ncol, inv_perm_c); } #endif SUPERLU_FREE(itemp); #if 0 /* Compute the mapping between rows and processes. */ /* XSL NOTE: What happens if # of mapped processes is smaller than total Procs? For the processes without any row, let fst_row be EMPTY (-1). Make sure this case works! */ MPI_Allgather(&fst_row, 1, mpi_int_t, itemp, 1, mpi_int_t, grid->comm); itemp[procs] = n; for (p = 0; p < procs; ++p) { j = itemp[p]; if ( j != EMPTY ) { k = itemp[p+1]; if ( k == EMPTY ) k = n; for (i = j ; i < k; ++i) row_to_proc[i] = p; } } #endif get_diag_procs(A->ncol, LUstruct->Glu_persist, grid, &SOLVEstruct->num_diag_procs, &SOLVEstruct->diag_procs, &SOLVEstruct->diag_len); /* Setup communication pattern for redistribution of B and X. */ if ( !(SOLVEstruct->gstrs_comm = (pxgstrs_comm_t *) SUPERLU_MALLOC(sizeof(pxgstrs_comm_t))) ) ABORT("Malloc fails for gstrs_comm[]"); pxgstrs_init(A->ncol, m_loc, nrhs, fst_row, perm_r, perm_c, grid, LUstruct->Glu_persist, SOLVEstruct); if ( !(SOLVEstruct->gsmv_comm = (pdgsmv_comm_t *) SUPERLU_MALLOC(sizeof(pdgsmv_comm_t))) ) ABORT("Malloc fails for gsmv_comm[]"); SOLVEstruct->A_colind_gsmv = NULL; options->SolveInitialized = YES; return 0; } /* dSolveInit */ /*! \brief Release the resources used for the solution phase. */ void dSolveFinalize(superlu_dist_options_t *options, SOLVEstruct_t *SOLVEstruct) { int_t *it; pxgstrs_finalize(SOLVEstruct->gstrs_comm); if ( options->RefineInitialized ) { pdgsmv_finalize(SOLVEstruct->gsmv_comm); options->RefineInitialized = NO; } SUPERLU_FREE(SOLVEstruct->gsmv_comm); SUPERLU_FREE(SOLVEstruct->row_to_proc); SUPERLU_FREE(SOLVEstruct->inv_perm_c); SUPERLU_FREE(SOLVEstruct->diag_procs); SUPERLU_FREE(SOLVEstruct->diag_len); if ( it = SOLVEstruct->A_colind_gsmv ) SUPERLU_FREE(it); options->SolveInitialized = NO; } /* dSolveFinalize */ /*! \brief Check the inf-norm of the error vector */ void pdinf_norm_error(int iam, int_t n, int_t nrhs, double x[], int_t ldx, double xtrue[], int_t ldxtrue, gridinfo_t *grid) { double err, xnorm, temperr, tempxnorm; double *x_work, *xtrue_work; int i, j; for (j = 0; j < nrhs; j++) { x_work = &x[j*ldx]; xtrue_work = &xtrue[j*ldxtrue]; err = xnorm = 0.0; for (i = 0; i < n; i++) { err = SUPERLU_MAX(err, fabs(x_work[i] - xtrue_work[i])); xnorm = SUPERLU_MAX(xnorm, fabs(x_work[i])); } /* get the golbal max err & xnrom */ temperr = err; tempxnorm = xnorm; MPI_Allreduce( &temperr, &err, 1, MPI_DOUBLE, MPI_MAX, grid->comm); MPI_Allreduce( &tempxnorm, &xnorm, 1, MPI_DOUBLE, MPI_MAX, grid->comm); err = err / xnorm; if ( !iam ) printf("\tSol %2d: ||X-Xtrue||/||X|| = %e\n", j, err); } } SuperLU_DIST_5.3.0/SRC/dsp_blas2_dist.c0000644013363400111340000003447513233431301016340 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Sparse BLAS 2, using some dense BLAS 2 operations * *
 * -- Distributed SuperLU routine (version 1.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 * 
*/ /* * File name: sp_blas2.c * Purpose: Sparse BLAS 2, using some dense BLAS 2 operations. */ #include "superlu_ddefs.h" /* * Function prototypes */ #ifndef USE_VENDOR_BLAS extern void dusolve(int, int, double*, double*); extern void dlsolve(int, int, double*, double*); extern void dmatvec(int, int, int, double*, double*, double*); #endif /*! \brief * *
 *   Purpose
 *   =======
 *
 *   sp_dtrsv_dist() solves one of the systems of equations   
 *       A*x = b,   or   A'*x = b,
 *   where b and x are n element vectors and A is a sparse unit , or   
 *   non-unit, upper or lower triangular matrix.   
 *   No test for singularity or near-singularity is included in this   
 *   routine. Such tests must be performed before calling this routine.   
 *
 *   Parameters   
 *   ==========   
 *
 *   uplo   - (input) char*
 *            On entry, uplo specifies whether the matrix is an upper or   
 *             lower triangular matrix as follows:   
 *                uplo = 'U' or 'u'   A is an upper triangular matrix.   
 *                uplo = 'L' or 'l'   A is a lower triangular matrix.   
 *
 *   trans  - (input) char*
 *             On entry, trans specifies the equations to be solved as   
 *             follows:   
 *                trans = 'N' or 'n'   A*x = b.   
 *                trans = 'T' or 't'   A'*x = b.   
 *                trans = 'C' or 'c'   A'*x = b.   
 *
 *   diag   - (input) char*
 *             On entry, diag specifies whether or not A is unit   
 *             triangular as follows:   
 *                diag = 'U' or 'u'   A is assumed to be unit triangular.   
 *                diag = 'N' or 'n'   A is not assumed to be unit   
 *                                    triangular.   
 *	     
 *   L       - (input) SuperMatrix*
 *	       The factor L from the factorization Pr*A*Pc=L*U. Use
 *             compressed row subscripts storage for supernodes, i.e.,
 *             L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU.
 *
 *   U       - (input) SuperMatrix*
 *	        The factor U from the factorization Pr*A*Pc=L*U.
 *	        U has types: Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_TRU.
 *    
 *   x       - (input/output) double*
 *             Before entry, the incremented array X must contain the n   
 *             element right-hand side vector b. On exit, X is overwritten 
 *             with the solution vector x.
 *
 *   info    - (output) int*
 *             If *info = -i, the i-th argument had an illegal value.
 * 
 */
int
sp_dtrsv_dist(char *uplo, char *trans, char *diag, SuperMatrix *L, 
	      SuperMatrix *U, double *x, int *info)
{

#ifdef _CRAY
    _fcd ftcs1, ftcs2, ftcs3;
#endif
    SCformat *Lstore;
    NCformat *Ustore;
    double   *Lval, *Uval;
    int incx = 1, incy = 1;
    double alpha = 1.0, beta = 1.0;
    int nrow;
    int fsupc, nsupr, nsupc, luptr, istart, irow;
    int i, k, iptr, jcol;
    double *work;
    flops_t solve_ops;
    /*extern SuperLUStat_t SuperLUStat;*/

    /* Test the input parameters */
    *info = 0;
    if ( strncmp(uplo,"L",1) != 0 && strncmp(uplo, "U",1) !=0 ) *info = -1;
    else if ( strncmp(trans, "N",1) !=0 && strncmp(trans, "T", 1) !=0 )
	*info = -2;
    else if ( strncmp(diag, "U", 1) !=0 && strncmp(diag, "N", 1) != 0 )
	*info = -3;
    else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
    else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
    if ( *info ) {
	i = -(*info);
	xerr_dist("sp_dtrsv_dist", &i);
	return 0;
    }

    Lstore = (SCformat *) L->Store;
    Lval = (double *) Lstore->nzval;
    Ustore = (NCformat *) U->Store;
    Uval = (double *) Ustore->nzval;
    solve_ops = 0;

    if ( !(work = doubleCalloc_dist(L->nrow)) )
	ABORT("Malloc fails for work in sp_dtrsv_dist().");
    
    if ( strncmp(trans, "N", 1)==0 ) {	/* Form x := inv(A)*x. */
	
	if ( strncmp(uplo, "L", 1)==0 ) {
	    /* Form x := inv(L)*x */
    	    if ( L->nrow == 0 ) return 0; /* Quick return */
	    
	    for (k = 0; k <= Lstore->nsuper; k++) {
		fsupc = L_FST_SUPC(k);
		istart = L_SUB_START(fsupc);
		nsupr = L_SUB_START(fsupc+1) - istart;
		nsupc = L_FST_SUPC(k+1) - fsupc;
		luptr = L_NZ_START(fsupc);
		nrow = nsupr - nsupc;

	        solve_ops += nsupc * (nsupc - 1);
	        solve_ops += 2 * nrow * nsupc;

		if ( nsupc == 1 ) {
		    for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
			irow = L_SUB(iptr);
			++luptr;
			x[irow] -= x[fsupc] * Lval[luptr];
		    }
		} else {
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
		    ftcs1 = _cptofcd("L", strlen("L"));
		    ftcs2 = _cptofcd("N", strlen("N"));
		    ftcs3 = _cptofcd("U", strlen("U"));
		    STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
		       	&x[fsupc], &incx);
		
		    SGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
		       	&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
#else
		    dtrsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
		       	&x[fsupc], &incx, 1, 1, 1);
		
		    dgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
		       	&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy, 1);
#endif /* _CRAY */		
#else
		    dlsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
		
		    dmatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
			&x[fsupc], &work[0] );
#endif		
		
		    iptr = istart + nsupc;
		    for (i = 0; i < nrow; ++i, ++iptr) {
			irow = L_SUB(iptr);
			x[irow] -= work[i];	/* Scatter */
			work[i] = 0.0;

		    }
	 	}
	    } /* for k ... */
	    
	} else {
	    /* Form x := inv(U)*x */
	    
	    if ( U->nrow == 0 ) return 0; /* Quick return */
	    
	    for (k = Lstore->nsuper; k >= 0; k--) {
	    	fsupc = L_FST_SUPC(k);
	    	nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
	    	nsupc = L_FST_SUPC(k+1) - fsupc;
	    	luptr = L_NZ_START(fsupc);
		
    	        solve_ops += nsupc * (nsupc + 1);

		if ( nsupc == 1 ) {
		    x[fsupc] /= Lval[luptr];
		    for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
			irow = U_SUB(i);
			x[irow] -= x[fsupc] * Uval[i];
		    }
		} else {
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
		    ftcs1 = _cptofcd("U", strlen("U"));
		    ftcs2 = _cptofcd("N", strlen("N"));
		    STRSV(ftcs1, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
		       &x[fsupc], &incx);
#else
		    dtrsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
		       &x[fsupc], &incx, 1, 1, 1);
#endif
#else		
		    dusolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
#endif		

		    for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
		        solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
		    	for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); 
				i++) {
			    irow = U_SUB(i);
			    x[irow] -= x[jcol] * Uval[i];
		    	}
                    }
		}
	    } /* for k ... */
	    
	}
    } else { /* Form x := inv(A')*x */
	
	if ( strncmp(uplo, "L", 1)==0 ) {
	    /* Form x := inv(L')*x */
    	    if ( L->nrow == 0 ) return 0; /* Quick return */
	    
	    for (k = Lstore->nsuper; k >= 0; --k) {
	    	fsupc = L_FST_SUPC(k);
	    	istart = L_SUB_START(fsupc);
	    	nsupr = L_SUB_START(fsupc+1) - istart;
	    	nsupc = L_FST_SUPC(k+1) - fsupc;
	    	luptr = L_NZ_START(fsupc);

		solve_ops += 2 * (nsupr - nsupc) * nsupc;

		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
		    iptr = istart + nsupc;
		    for (i = L_NZ_START(jcol) + nsupc; 
				i < L_NZ_START(jcol+1); i++) {
			irow = L_SUB(iptr);
			x[jcol] -= x[irow] * Lval[i];
			iptr++;
		    }
		}
		
		if ( nsupc > 1 ) {
		    solve_ops += nsupc * (nsupc - 1);
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
                    ftcs1 = _cptofcd("L", strlen("L"));
                    ftcs2 = _cptofcd("T", strlen("T"));
                    ftcs3 = _cptofcd("U", strlen("U"));
		    STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
			&x[fsupc], &incx);
#else
		    dtrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
			&x[fsupc], &incx, 1, 1, 1);
#endif
#else
		    dtrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
			&x[fsupc], &incx);
#endif
		}
	    }
	} else {
	    /* Form x := inv(U')*x */
	    if ( U->nrow == 0 ) return 0; /* Quick return */
	    
	    for (k = 0; k <= Lstore->nsuper; k++) {
	    	fsupc = L_FST_SUPC(k);
	    	nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
	    	nsupc = L_FST_SUPC(k+1) - fsupc;
	    	luptr = L_NZ_START(fsupc);

		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
		    solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
		    for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
			irow = U_SUB(i);
			x[jcol] -= x[irow] * Uval[i];
		    }
		}

		solve_ops += nsupc * (nsupc + 1);

		if ( nsupc == 1 ) {
		    x[fsupc] /= Lval[luptr];
		} else {
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
                    ftcs1 = _cptofcd("U", strlen("U"));
                    ftcs2 = _cptofcd("T", strlen("T"));
                    ftcs3 = _cptofcd("N", strlen("N"));
		    STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
			    &x[fsupc], &incx);
#else
		    dtrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
			    &x[fsupc], &incx, 1, 1, 1);
#endif
#else
		    dtrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
			    &x[fsupc], &incx);
#endif
		}
	    } /* for k ... */
	}
    }

    /*SuperLUStat.ops[SOLVE] += solve_ops;*/
    SUPERLU_FREE(work);
    return 0;
}


/*! \brief

  Purpose   
    =======   

    sp_dgemv_dist()  performs one of the matrix-vector operations   
       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   
    where alpha and beta are scalars, x and y are vectors and A is a
    sparse A->nrow by A->ncol matrix.   

    Parameters   
    ==========   

    TRANS  - (input) char*
             On entry, TRANS specifies the operation to be performed as   
             follows:   
                TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.   
                TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.   
                TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.   

    ALPHA  - (input) double
             On entry, ALPHA specifies the scalar alpha.   

    A      - (input) SuperMatrix*
             Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
             Currently, the type of A can be:
                 Stype = SLU_NC or SLU_NCP; Dtype = SLU_D; Mtype = SLU_GE. 
             In the future, more general A can be handled.

    X      - (input) double*, array of DIMENSION at least   
             ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'   
             and at least   
             ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.   
             Before entry, the incremented array X must contain the   
             vector x.   

    INCX   - (input) int
             On entry, INCX specifies the increment for the elements of   
             X. INCX must not be zero.   

    BETA   - (input) double
             On entry, BETA specifies the scalar beta. When BETA is   
             supplied as zero then Y need not be set on input.   

    Y      - (output) double*,  array of DIMENSION at least   
             ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'   
             and at least   
             ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.   
             Before entry with BETA non-zero, the incremented array Y   
             must contain the vector y. On exit, Y is overwritten by the 
             updated vector y.
	     
    INCY   - (input) int
             On entry, INCY specifies the increment for the elements of   
             Y. INCY must not be zero.   

    ==== Sparse Level 2 Blas routine.   
*/ int sp_dgemv_dist(char *trans, double alpha, SuperMatrix *A, double *x, int incx, double beta, double *y, int incy) { /* Local variables */ NCformat *Astore; double *Aval; int info; double temp; int lenx, leny, i, j, irow; int iy, jx, jy, kx, ky; int notran; notran = (strncmp(trans, "N", 1)==0); Astore = (NCformat *) A->Store; Aval = (double *) Astore->nzval; /* Test the input parameters */ info = 0; if ( !notran && strncmp(trans, "T", 1) !=0 && strncmp(trans, "C", 1) != 0) info = 1; else if ( A->nrow < 0 || A->ncol < 0 ) info = 3; else if (incx == 0) info = 5; else if (incy == 0) info = 8; if (info != 0) { xerr_dist("sp_dgemv_dist ", &info); return 0; } /* Quick return if possible. */ if (A->nrow == 0 || A->ncol == 0 || alpha == 0. && beta == 1.) return 0; /* Set LENX and LENY, the lengths of the vectors x and y, and set up the start points in X and Y. */ if ( strncmp(trans, "N", 1)==0 ) { lenx = A->ncol; leny = A->nrow; } else { lenx = A->nrow; leny = A->ncol; } if (incx > 0) kx = 0; else kx = - (lenx - 1) * incx; if (incy > 0) ky = 0; else ky = - (leny - 1) * incy; /* Start the operations. In this version the elements of A are accessed sequentially with one pass through A. */ /* First form y := beta*y. */ if (beta != 1.) { if (incy == 1) { if (beta == 0.) for (i = 0; i < leny; ++i) y[i] = 0.; else for (i = 0; i < leny; ++i) y[i] = beta * y[i]; } else { iy = ky; if (beta == 0.) for (i = 0; i < leny; ++i) { y[iy] = 0.; iy += incy; } else for (i = 0; i < leny; ++i) { y[iy] = beta * y[iy]; iy += incy; } } } if (alpha == 0.) return 0; if ( notran ) { /* Form y := alpha*A*x + y. */ jx = kx; if (incy == 1) { for (j = 0; j < A->ncol; ++j) { if (x[jx] != 0.) { temp = alpha * x[jx]; for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; y[irow] += temp * Aval[i]; } } jx += incx; } } else { ABORT("Not implemented."); } } else { /* Form y := alpha*A'*x + y. */ jy = ky; if (incx == 1) { for (j = 0; j < A->ncol; ++j) { temp = 0.; for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; temp += Aval[i] * x[irow]; } y[jy] += alpha * temp; jy += incy; } } else { ABORT("Not implemented."); } } return 0; } /* sp_dgemv_dist */ SuperLU_DIST_5.3.0/SRC/Cnames.h0000644013363400111340000003360713233431301014653 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Macro definitions * *
 * -- Distributed SuperLU routine (version 1.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 * 
*/ #ifndef __SUPERLU_CNAMES /* allow multiple inclusions */ #define __SUPERLU_CNAMES /* * These macros define how C routines will be called. ADD_ assumes that * they will be called by fortran, which expects C routines to have an * underscore postfixed to the name (Suns, and the Intel expect this). * NOCHANGE indicates that fortran will be calling, and that it expects * the name called by fortran to be identical to that compiled by the C * (RS6K's do this). UPCASE says it expects C routines called by fortran * to be in all upcase (CRAY wants this). */ #define ADD_ 0 #define NOCHANGE 1 #define UPCASE 2 #define C_CALL 3 #ifdef UpCase #define F77_CALL_C UPCASE #endif #ifdef NoChange #define F77_CALL_C NOCHANGE #endif #ifdef Add_ #define F77_CALL_C ADD_ #endif #ifndef F77_CALL_C #define F77_CALL_C ADD_ #endif #if (F77_CALL_C == ADD_) /* * These defines set up the naming scheme required to have a fortran 77 * routine call a C routine * No redefinition necessary to have following Fortran to C interface: * FORTRAN CALL C DECLARATION * call dgemm(...) void dgemm_(...) * * This is the default. */ /* These are the functions defined in F90 wraper */ #define f_create_gridinfo_handle f_create_gridinfo_handle_ #define f_create_options_handle f_create_options_handle_ #define f_create_ScalePerm_handle f_create_scaleperm_handle_ #define f_create_LUstruct_handle f_create_lustruct_handle_ #define f_create_SOLVEstruct_handle f_create_solvestruct_handle_ #define f_create_SuperMatrix_handle f_create_supermatrix_handle_ #define f_destroy_gridinfo_handle f_destroy_gridinfo_handle_ #define f_destroy_options_handle f_destroy_options_handle_ #define f_destroy_ScalePerm_handle f_destroy_scaleperm_handle_ #define f_destroy_LUstruct_handle f_destroy_lustruct_handle_ #define f_destroy_SOLVEstruct_handle f_destroy_solvestruct_handle_ #define f_destroy_SuperMatrix_handle f_destroy_supermatrix_handle_ #define f_create_SuperLUStat_handle f_create_superlustat_handle_ #define f_destroy_SuperLUStat_handle f_destroy_superlustat_handle_ #define f_get_gridinfo f_get_gridinfo_ #define f_get_SuperMatrix f_get_supermatrix_ #define f_set_SuperMatrix f_set_supermatrix_ #define f_get_CompRowLoc_Matrix f_get_comprowloc_matrix_ #define f_set_CompRowLoc_Matrix f_set_comprowloc_matrix_ #define f_get_superlu_options f_get_superlu_options_ #define f_set_superlu_options f_set_superlu_options_ #define f_set_default_options f_set_default_options_ #define f_superlu_gridinit f_superlu_gridinit_ #define f_superlu_gridmap f_superlu_gridmap_ #define f_superlu_gridexit f_superlu_gridexit_ #define f_ScalePermstructInit f_scalepermstructinit_ #define f_ScalePermstructFree f_scalepermstructfree_ #define f_PStatInit f_pstatinit_ #define f_PStatFree f_pstatfree_ #define f_LUstructInit f_lustructinit_ #define f_LUstructFree f_lustructfree_ #define f_Destroy_LU f_destroy_lu_ #define f_dCreate_CompRowLoc_Mat_dist f_dcreate_comprowloc_mat_dist_ #define f_zCreate_CompRowLoc_Mat_dist f_zcreate_comprowloc_mat_dist_ #define f_Destroy_CompRowLoc_Mat_dist f_destroy_comprowloc_mat_dist_ #define f_Destroy_SuperMat_Store_dist f_destroy_supermat_store_dist_ #define f_dSolveFinalize f_dsolvefinalize_ #define f_zSolveFinalize f_zsolvefinalize_ #define f_pdgssvx f_pdgssvx_ #define f_pzgssvx f_pzgssvx_ #define f_dcreate_dist_matrix f_dcreate_dist_matrix_ #define f_zcreate_dist_matrix f_zcreate_dist_matrix_ #define f_check_malloc f_check_malloc_ #endif #if (F77_CALL_C == UPCASE) /* * These defines set up the naming scheme required to have a fortran 77 * routine call a C routine * following Fortran to C interface: * FORTRAN CALL C DECLARATION * call dgemm(...) void DGEMM(...) */ /* BLAS */ #define sasum_ SASUM #define isamax_ ISAMAX #define scopy_ SCOPY #define sscal_ SSCAL #define sger_ SGER #define snrm2_ SNRM2 #define ssymv_ SSYMV #define sdot_ SDOT #define saxpy_ SAXPY #define ssyr2_ SSYR2 #define srot_ SROT #define sgemv_ SGEMV #define strsv_ STRSV #define sgemm_ SGEMM #define strsm_ STRSM #define dasum_ DASUM #define idamax_ IDAMAX #define dcopy_ DCOPY #define dscal_ DSCAL #define dger_ DGER #define dnrm2_ DNRM2 #define dsymv_ DSYMV #define ddot_ DDOT #define daxpy_ DAXPY #define dsyr2_ DSYR2 #define drot_ DROT #define dgemv_ DGEMV #define dtrsv_ DTRSV #define dgemm_ DGEMM #define dtrsm_ DTRSM #define scasum_ SCASUM #define icamax_ ICAMAX #define ccopy_ CCOPY #define cscal_ CSCAL #define scnrm2_ SCNRM2 #define caxpy_ CAXPY #define cgemv_ CGEMV #define ctrsv_ CTRSV #define cgemm_ CGEMM #define ctrsm_ CTRSM #define cgerc_ CGERC #define chemv_ CHEMV #define cher2_ CHER2 #define dzasum_ DZASUM #define izamax_ IZAMAX #define zcopy_ ZCOPY #define zscal_ ZSCAL #define dznrm2_ DZNRM2 #define zaxpy_ ZAXPY #define zgemv_ ZGEMV #define ztrsv_ ZTRSV #define zgemm_ ZGEMM #define ztrsm_ ZTRSM #define zgerc_ ZGERC #define zhemv_ ZHEMV #define zher2_ ZHER2 #define zgeru_ ZGERU /* #define mc64id_dist MC64ID_DIST #define mc64ad_dist MC64AD_DIST */ #define c_bridge_dgssv_ C_BRIDGE_DGSSV #define c_fortran_slugrid_ C_FORTRAN_SLUGRID #define c_fortran_pdgssvx_ C_FORTRAN_PDGSSVX #define c_fortran_pdgssvx_ABglobal_ C_FORTRAN_PDGSSVX_ABGLOBAL #define c_fortran_pzgssvx_ C_FORTRAN_PZGSSVX #define c_fortran_pzgssvx_ABglobal_ C_FORTRAN_PZGSSVX_ABGLOBAL /* These are the functions defined in F90 wraper */ #define f_create_gridinfo_handle F_CREATE_GRIDINFO_HANDLE #define f_create_options_handle F_CREATE_OPTIONS_HANDLE #define f_create_ScalePerm_handle F_CREATE_SCALEPERM_HANDLE #define f_create_LUstruct_handle F_CREATE_LUSTRUCT_HANDLE #define f_create_SOLVEstruct_handle F_CREATE_SOLVESTRUCT_HANDLE #define f_create_SuperMatrix_handle F_CREATE_SUPERMATRIX_HANDLE #define f_destroy_gridinfo_handle F_DESTROY_GRIDINFO_HANDLE #define f_destroy_options_handle F_DESTROY_OPTIONS_HANDLE #define f_destroy_ScalePerm_handle F_DESTROY_SCALEPERM_HANDLE #define f_destroy_LUstruct_handle F_DESTROY_LUSTRUCT_HANDLE #define f_destroy_SOLVEstruct_handle F_DESTROY_SOLVESTRUCT_HANDLE #define f_destroy_SuperMatrix_handle F_DESTROY_SUPERMATRIX_HANDLE #define f_create_SuperLUStat_handle F_CREATE_SUPERLUSTAT_HANDLE #define f_destroy_SuperLUStat_handle F_DESTROY_SUPERLUSTAT_HANDLE #define f_get_gridinfo F_GET_GRIDINFO #define f_get_SuperMatrix F_GET_SUPERMATRIX #define f_set_SuperMatrix F_SET_SUPERMATRIX #define f_get_CompRowLoc_Matrix F_GET_COMPROWLOC_MATRIX #define f_set_CompRowLoc_Matrix F_SET_COMPROWLOC_MATRIX #define f_get_superlu_options F_GET_SUPERLU_OPTIONS #define f_set_superlu_options F_SET_SUPERLU_OPTIONS #define f_set_default_options F_SET_DEFAULT_OPTIONS #define f_superlu_gridinit F_SUPERLU_GRIDINIT #define f_superlu_gridmap F_SUPERLU_GRIDMAP #define f_superlu_gridexit F_SUPERLU_GRIDEXIT #define f_ScalePermstructInit F_SCALEPERMSTRUCTINIT #define f_ScalePermstructFree F_SCALEPERMSTRUCTFREE #define f_PStatInit F_PSTATINIT #define f_PStatFree F_PSTATFREE #define f_LUstructInit F_LUSTRUCTINIT #define f_LUstructFree F_LUSTRUCTFREE #define f_Destroy_LU F_DESTROY_LU #define f_dCreate_CompRowLoc_Mat_dist F_DCREATE_COMPROWLOC_MAT_DIST #define f_zCreate_CompRowLoc_Mat_dist F_ZCREATE_COMPROWLOC_MAT_DIST #define f_Destroy_CompRowLoc_Mat_dist F_DESTROY_COMPROWLOC_MAT_DIST #define f_Destroy_SuperMat_Store_dist F_DESTROY_SUPERMAT_STORE_DIST #define f_dSolveFinalize F_DSOLVEFINALIZE #define f_zSolveFinalize F_ZSOLVEFINALIZE #define f_pdgssvx F_PDGSSVX #define f_pzgssvx F_PZGSSVX #define f_dcreate_dist_matrix F_DCREATE_DIST_MATRIX #define f_zcreate_dist_matrix F_ZCREATE_DIST_MATRIX #define f_check_malloc F_CHECK_MALLOC #endif #if (F77_CALL_C == NOCHANGE) /* * These defines set up the naming scheme required to have a fortran 77 * routine call a C routine * for following Fortran to C interface: * FORTRAN CALL C DECLARATION * call dgemm(...) void dgemm(...) */ /* BLAS */ #define sasum_ sasum #define isamax_ isamax #define scopy_ scopy #define sscal_ sscal #define sger_ sger #define snrm2_ snrm2 #define ssymv_ ssymv #define sdot_ sdot #define saxpy_ saxpy #define ssyr2_ ssyr2 #define srot_ srot #define sgemv_ sgemv #define strsv_ strsv #define sgemm_ sgemm #define strsm_ strsm #define dasum_ dasum #define idamax_ idamax #define dcopy_ dcopy #define dscal_ dscal #define dger_ dger #define dnrm2_ dnrm2 #define dsymv_ dsymv #define ddot_ ddot #define daxpy_ daxpy #define dsyr2_ dsyr2 #define drot_ drot #define dgemv_ dgemv #define dtrsv_ dtrsv #define dgemm_ dgemm #define dtrsm_ dtrsm #define scasum_ scasum #define icamax_ icamax #define ccopy_ ccopy #define cscal_ cscal #define scnrm2_ scnrm2 #define caxpy_ caxpy #define cgemv_ cgemv #define ctrsv_ ctrsv #define cgemm_ cgemm #define ctrsm_ ctrsm #define cgerc_ cgerc #define chemv_ chemv #define cher2_ cher2 #define dzasum_ dzasum #define izamax_ izamax #define zcopy_ zcopy #define zscal_ zscal #define dznrm2_ dznrm2 #define zaxpy_ zaxpy #define zgemv_ zgemv #define ztrsv_ ztrsv #define zgemm_ zgemm #define ztrsm_ ztrsm #define zgerc_ zgerc #define zhemv_ zhemv #define zher2_ zher2 #define zgeru_ zgeru /* #define mc64id_dist mc64id_dist #define mc64ad_dist mc64ad_dist */ #define c_bridge_dgssv_ c_bridge_dgssv #define c_fortran_slugrid_ c_fortran_slugrid #define c_fortran_pdgssvx_ c_fortran_pdgssvx #define c_fortran_pdgssvx_ABglobal_ c_fortran_pdgssvx_abglobal #define c_fortran_pzgssvx_ c_fortran_pzgssvx #define c_fortran_pzgssvx_ABglobal_ c_fortran_pzgssvx_abglobal /* These are the functions defined in F90 wraper */ #define f_create_gridinfo_handle f_create_gridinfo_handle #define f_create_options_handle f_create_options_handle #define f_create_ScalePerm_handle f_create_scaleperm_handle #define f_create_LUstruct_handle f_create_lustruct_handle #define f_create_SOLVEstruct_handle f_create_solvestruct_handle #define f_create_SuperMatrix_handle f_create_supermatrix_handle #define f_destroy_gridinfo_handle f_destroy_gridinfo_handle #define f_destroy_options_handle f_destroy_options_handle #define f_destroy_ScalePerm_handle f_destroy_scaleperm_handle #define f_destroy_LUstruct_handle f_destroy_lustruct_handle #define f_destroy_SOLVEstruct_handle f_destroy_solvestruct_handle #define f_destroy_SuperMatrix_handle f_destroy_supermatrix_handle #define f_create_SuperLUStat_handle f_create_superlustat_handle #define f_destroy_SuperLUStat_handle f_destroy_superlustat_handle #define f_get_gridinfo f_get_gridinfo #define f_get_SuperMatrix f_get_supermatrix #define f_set_SuperMatrix f_set_supermatrix #define f_get_CompRowLoc_Matrix f_get_comprowloc_matrix #define f_set_CompRowLoc_Matrix f_set_comprowloc_matrix #define f_get_superlu_options f_get_superlu_options #define f_set_superlu_options f_set_superlu_options #define f_set_default_options f_set_default_options #define f_superlu_gridinit f_superlu_gridinit #define f_superlu_gridmap f_superlu_gridmap #define f_superlu_gridexit f_superlu_gridexit #define f_ScalePermstructInit f_scalepermstructinit #define f_ScalePermstructFree f_scalepermstructfree #define f_PStatInit f_pstatinit #define f_PStatFree f_pstatfree #define f_LUstructInit f_lustructinit #define f_LUstructFree f_lustructfree #define f_Destroy_LU f_destroy_lu #define f_dCreate_CompRowLoc_Mat_dist f_dcreate_comprowloc_mat_dist #define f_Destroy_CompRowLoc_Mat_dist f_destroy_comprowloc_mat_dist #define f_Destroy_SuperMat_Store_dist f_destroy_supermat_store_dist #define f_dSolveFinalize f_dsolvefinalize #define f_zSolveFinalize f_zsolvefinalize #define f_pdgssvx f_pdgssvx #define f_pzgssvx f_pzgssvx #define f_dcreate_dist_matrix f_dcreate_dist_matrix #define f_zcreate_dist_matrix f_zcreate_dist_matrix #define f_check_malloc f_check_malloc #endif #endif /* __SUPERLU_CNAMES */ SuperLU_DIST_5.3.0/SRC/pzgsmv.c0000644013363400111340000003243013233431301014757 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Parallel sparse matrix-vector multiplication * *
 * -- Distributed SuperLU routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * March 15, 2003
 * 
*/ #include #include "superlu_zdefs.h" void pzgsmv_init ( SuperMatrix *A, /* Matrix A permuted by columns (input/output). The type of A can be: Stype = SLU_NR_loc; Dtype = SLU_Z; Mtype = SLU_GE. */ int_t *row_to_proc, /* Input. Mapping between rows and processes. */ gridinfo_t *grid, /* Input */ pzgsmv_comm_t *gsmv_comm /* Output. The data structure for communication. */ ) { NRformat_loc *Astore; int iam, p, procs; int *SendCounts, *RecvCounts; int_t i, j, k, l, m, m_loc, n, fst_row, jcol; int_t TotalIndSend, TotalValSend; int_t *colind, *rowptr; int_t *ind_tosend = NULL, *ind_torecv = NULL; int_t *ptr_ind_tosend, *ptr_ind_torecv; int_t *extern_start, *spa, *itemp; doublecomplex *nzval, *val_tosend = NULL, *val_torecv = NULL, t; MPI_Request *send_req, *recv_req; MPI_Status status; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Enter pzgsmv_init()"); #endif /* ------------------------------------------------------------ INITIALIZATION. ------------------------------------------------------------*/ iam = grid->iam; procs = grid->nprow * grid->npcol; Astore = (NRformat_loc *) A->Store; m = A->nrow; n = A->ncol; m_loc = Astore->m_loc; fst_row = Astore->fst_row; colind = Astore->colind; rowptr = Astore->rowptr; nzval = Astore->nzval; if ( !(SendCounts = SUPERLU_MALLOC(2*procs * sizeof(int))) ) ABORT("Malloc fails for SendCounts[]"); /*for (i = 0; i < 2*procs; ++i) SendCounts[i] = 0;*/ RecvCounts = SendCounts + procs; if ( !(ptr_ind_tosend = intMalloc_dist(2*(procs+1))) ) ABORT("Malloc fails for ptr_ind_tosend[]"); ptr_ind_torecv = ptr_ind_tosend + procs + 1; if ( !(extern_start = intMalloc_dist(m_loc)) ) ABORT("Malloc fails for extern_start[]"); for (i = 0; i < m_loc; ++i) extern_start[i] = rowptr[i]; /* ------------------------------------------------------------ COUNT THE NUMBER OF X ENTRIES TO BE SENT TO EACH PROCESS. THIS IS THE UNION OF THE COLUMN INDICES OF MY ROWS. SWAP TO THE BEGINNING THE PART OF A CORRESPONDING TO THE LOCAL PART OF X. THIS ACCOUNTS FOR THE FIRST PASS OF ACCESSING MATRIX A. ------------------------------------------------------------*/ if ( !(spa = intCalloc_dist(n)) ) /* Aid in global to local translation */ ABORT("Malloc fails for spa[]"); for (p = 0; p < procs; ++p) SendCounts[p] = 0; for (i = 0; i < m_loc; ++i) { /* Loop through each row */ k = extern_start[i]; for (j = rowptr[i]; j < rowptr[i+1]; ++j) {/* Each nonzero in row i */ jcol = colind[j]; p = row_to_proc[jcol]; if ( p != iam ) { /* External */ if ( spa[jcol] == 0 ) { /* First time see this index */ ++SendCounts[p]; spa[jcol] = 1; } } else { /* Swap to beginning the part of A corresponding to the local part of X */ l = colind[k]; t = nzval[k]; colind[k] = jcol; nzval[k] = nzval[j]; colind[j] = l; nzval[j] = t; ++k; } } extern_start[i] = k; } /* ------------------------------------------------------------ LOAD THE X-INDICES TO BE SENT TO THE OTHER PROCESSES. THIS ACCOUNTS FOR THE SECOND PASS OF ACCESSING MATRIX A. ------------------------------------------------------------*/ /* Build pointers to ind_tosend[]. */ ptr_ind_tosend[0] = 0; for (p = 0, TotalIndSend = 0; p < procs; ++p) { TotalIndSend += SendCounts[p]; /* Total to send. */ ptr_ind_tosend[p+1] = ptr_ind_tosend[p] + SendCounts[p]; } #if 0 ptr_ind_tosend[iam] = 0; /* Local part of X */ #endif if ( TotalIndSend ) { if ( !(ind_tosend = intMalloc_dist(TotalIndSend)) ) ABORT("Malloc fails for ind_tosend[]"); /* Exclude local part of X */ } /* Build SPA to aid global to local translation. */ for (i = 0; i < n; ++i) spa[i] = EMPTY; for (i = 0; i < m_loc; ++i) { /* Loop through each row of A */ for (j = rowptr[i]; j < rowptr[i+1]; ++j) { jcol = colind[j]; if ( spa[jcol] == EMPTY ) { /* First time see this index */ p = row_to_proc[jcol]; if ( p == iam ) { /* Local */ /*assert(jcol>=fst_row);*/ spa[jcol] = jcol - fst_row; /* Relative position in local X */ } else { /* External */ ind_tosend[ptr_ind_tosend[p]] = jcol; /* Still global */ spa[jcol] = ptr_ind_tosend[p]; /* Position in ind_tosend[] */ ++ptr_ind_tosend[p]; } } } } /* ------------------------------------------------------------ TRANSFORM THE COLUMN INDICES OF MATRIX A INTO LOCAL INDICES. THIS ACCOUNTS FOR THE THIRD PASS OF ACCESSING MATRIX A. ------------------------------------------------------------*/ for (i = 0; i < m_loc; ++i) { for (j = rowptr[i]; j < rowptr[i+1]; ++j) { jcol = colind[j]; colind[j] = spa[jcol]; } } /* ------------------------------------------------------------ COMMUNICATE THE EXTERNAL INDICES OF X. ------------------------------------------------------------*/ MPI_Alltoall(SendCounts, 1, MPI_INT, RecvCounts, 1, MPI_INT, grid->comm); /* Build pointers to ind_torecv[]. */ ptr_ind_torecv[0] = 0; for (p = 0, TotalValSend = 0; p < procs; ++p) { TotalValSend += RecvCounts[p]; /* Total to receive. */ ptr_ind_torecv[p+1] = ptr_ind_torecv[p] + RecvCounts[p]; } if ( TotalValSend ) { if ( !(ind_torecv = intMalloc_dist(TotalValSend)) ) ABORT("Malloc fails for ind_torecv[]"); } if ( !(send_req = (MPI_Request *) SUPERLU_MALLOC(2*procs *sizeof(MPI_Request)))) ABORT("Malloc fails for recv_req[]."); recv_req = send_req + procs; for (p = 0; p < procs; ++p) { ptr_ind_tosend[p] -= SendCounts[p]; /* Reset pointer to beginning */ if ( SendCounts[p] ) { MPI_Isend(&ind_tosend[ptr_ind_tosend[p]], SendCounts[p], mpi_int_t, p, iam, grid->comm, &send_req[p]); } if ( RecvCounts[p] ) { MPI_Irecv(&ind_torecv[ptr_ind_torecv[p]], RecvCounts[p], mpi_int_t, p, p, grid->comm, &recv_req[p]); } } for (p = 0; p < procs; ++p) { if ( SendCounts[p] ) MPI_Wait(&send_req[p], &status); if ( RecvCounts[p] ) MPI_Wait(&recv_req[p], &status); } /* Allocate storage for the X values to to transferred. */ if ( TotalIndSend && !(val_torecv = doublecomplexMalloc_dist(TotalIndSend)) ) ABORT("Malloc fails for val_torecv[]."); if ( TotalValSend && !(val_tosend = doublecomplexMalloc_dist(TotalValSend)) ) ABORT("Malloc fails for val_tosend[]."); gsmv_comm->extern_start = extern_start; gsmv_comm->ind_tosend = ind_tosend; gsmv_comm->ind_torecv = ind_torecv; gsmv_comm->ptr_ind_tosend = ptr_ind_tosend; gsmv_comm->ptr_ind_torecv = ptr_ind_torecv; gsmv_comm->SendCounts = SendCounts; gsmv_comm->RecvCounts = RecvCounts; gsmv_comm->val_tosend = val_tosend; gsmv_comm->val_torecv = val_torecv; gsmv_comm->TotalIndSend = TotalIndSend; gsmv_comm->TotalValSend = TotalValSend; SUPERLU_FREE(spa); SUPERLU_FREE(send_req); #if ( DEBUGlevel>=2 ) PrintInt10("pzgsmv_init::rowptr", m_loc+1, rowptr); PrintInt10("pzgsmv_init::extern_start", m_loc, extern_start); #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pzgsmv_init()"); #endif } /* PZGSMV_INIT */ /* * Performs sparse matrix-vector multiplication. */ void pzgsmv ( int_t abs, /* Input. Do abs(A)*abs(x). */ SuperMatrix *A_internal, /* Input. Matrix A permuted by columns. The column indices are translated into the relative positions in the gathered x-vector. The type of A can be: Stype = NR_loc; Dtype = SLU_Z; Mtype = GE. */ gridinfo_t *grid, /* Input */ pzgsmv_comm_t *gsmv_comm, /* Input. The data structure for communication. */ doublecomplex x[], /* Input. The distributed source vector */ doublecomplex ax[] /* Output. The distributed destination vector */ ) { NRformat_loc *Astore; int iam, procs; int_t i, j, p, m, m_loc, n, fst_row, jcol; int_t *colind, *rowptr; int *SendCounts, *RecvCounts; int_t *ind_tosend, *ind_torecv, *ptr_ind_tosend, *ptr_ind_torecv; int_t *extern_start, TotalValSend; doublecomplex *nzval, *val_tosend, *val_torecv; doublecomplex zero = {0.0, 0.0}, temp; double *ax_abs = (double *) ax; MPI_Request *send_req, *recv_req; MPI_Status status; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Enter pzgsmv()"); #endif /* ------------------------------------------------------------ INITIALIZATION. ------------------------------------------------------------*/ iam = grid->iam; procs = grid->nprow * grid->npcol; Astore = (NRformat_loc *) A_internal->Store; m = A_internal->nrow; n = A_internal->ncol; m_loc = Astore->m_loc; fst_row = Astore->fst_row; colind = Astore->colind; rowptr = Astore->rowptr; nzval = (doublecomplex *) Astore->nzval; extern_start = gsmv_comm->extern_start; ind_torecv = gsmv_comm->ind_torecv; ptr_ind_tosend = gsmv_comm->ptr_ind_tosend; ptr_ind_torecv = gsmv_comm->ptr_ind_torecv; SendCounts = gsmv_comm->SendCounts; RecvCounts = gsmv_comm->RecvCounts; val_tosend = (doublecomplex *) gsmv_comm->val_tosend; val_torecv = (doublecomplex *) gsmv_comm->val_torecv; TotalValSend = gsmv_comm->TotalValSend; /* ------------------------------------------------------------ COPY THE X VALUES INTO THE SEND BUFFER. ------------------------------------------------------------*/ for (i = 0; i < TotalValSend; ++i) { j = ind_torecv[i] - fst_row; /* Relative index in x[] */ val_tosend[i] = x[j]; } /* ------------------------------------------------------------ COMMUNICATE THE X VALUES. ------------------------------------------------------------*/ if ( !(send_req = (MPI_Request *) SUPERLU_MALLOC(2*procs *sizeof(MPI_Request)))) ABORT("Malloc fails for recv_req[]."); recv_req = send_req + procs; for (p = 0; p < procs; ++p) { if ( RecvCounts[p] ) { MPI_Isend(&val_tosend[ptr_ind_torecv[p]], RecvCounts[p], SuperLU_MPI_DOUBLE_COMPLEX, p, iam, grid->comm, &send_req[p]); } if ( SendCounts[p] ) { MPI_Irecv(&val_torecv[ptr_ind_tosend[p]], SendCounts[p], SuperLU_MPI_DOUBLE_COMPLEX, p, p, grid->comm, &recv_req[p]); } } /* ------------------------------------------------------------ PERFORM THE ACTUAL MULTIPLICATION. ------------------------------------------------------------*/ if ( abs ) { /* Perform abs(A)*abs(x) */ /* Multiply the local part. */ for (i = 0; i < m_loc; ++i) { /* Loop through each row */ ax_abs[i] = 0.0; for (j = rowptr[i]; j < extern_start[i]; ++j) { jcol = colind[j]; ax_abs[i] += slud_z_abs1(&nzval[j]) * slud_z_abs1(&x[jcol]); } } for (p = 0; p < procs; ++p) { if ( RecvCounts[p] ) MPI_Wait(&send_req[p], &status); if ( SendCounts[p] ) MPI_Wait(&recv_req[p], &status); } /* Multiply the external part. */ for (i = 0; i < m_loc; ++i) { /* Loop through each row */ for (j = extern_start[i]; j < rowptr[i+1]; ++j) { jcol = colind[j]; ax_abs[i] += slud_z_abs1(&nzval[j]) * slud_z_abs(&val_torecv[jcol]); } } } else { /* Multiply the local part. */ for (i = 0; i < m_loc; ++i) { /* Loop through each row */ ax[i] = zero; for (j = rowptr[i]; j < extern_start[i]; ++j) { jcol = colind[j]; zz_mult(&temp, &nzval[j], &x[jcol]); z_add(&ax[i], &ax[i], &temp); } } for (p = 0; p < procs; ++p) { if ( RecvCounts[p] ) MPI_Wait(&send_req[p], &status); if ( SendCounts[p] ) MPI_Wait(&recv_req[p], &status); } /* Multiply the external part. */ for (i = 0; i < m_loc; ++i) { /* Loop through each row */ for (j = extern_start[i]; j < rowptr[i+1]; ++j) { jcol = colind[j]; zz_mult(&temp, &nzval[j], &val_torecv[jcol]); z_add(&ax[i], &ax[i], &temp); } } } SUPERLU_FREE(send_req); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pzgsmv()"); #endif } /* PZGSMV */ void pzgsmv_finalize(pzgsmv_comm_t *gsmv_comm) { int_t *it; doublecomplex *dt; SUPERLU_FREE(gsmv_comm->extern_start); if ( it = gsmv_comm->ind_tosend ) SUPERLU_FREE(it); if ( it = gsmv_comm->ind_torecv ) SUPERLU_FREE(it); SUPERLU_FREE(gsmv_comm->ptr_ind_tosend); SUPERLU_FREE(gsmv_comm->SendCounts); if ( dt = gsmv_comm->val_tosend ) SUPERLU_FREE(dt); if ( dt = gsmv_comm->val_torecv ) SUPERLU_FREE(dt); } SuperLU_DIST_5.3.0/SRC/zlook_ahead_update.c0000644013363400111340000002226613233431301017261 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /************************************************************************/ /*! @file * \brief Look-ahead update of the Schur complement. * *
 * -- Distributed SuperLU routine (version 5.2) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * October 1, 2014
 *
 * Modified: September 18, 2017
 *   
 */

iukp = iukp0; /* point to the first block in index[] */
rukp = rukp0; /* point to the start of nzval[] */

#ifdef ISORT
while (j < nub && iperm_u[j] <= k0 + num_look_aheads)
#else
while (j < nub && perm_u[2 * j] <= k0 + num_look_aheads)
#endif
{
    doublecomplex zero = {0.0, 0.0};

#if 0 // Sherry: no need to search
    /* Caveat: There is a permutation perm_u involved for j  */
    /* Search along the row for the pointers {iukp, rukp} pointing to
     * block U(k,j).
     * j    -- current block in look-ahead window, initialized to 0 on entry
     * iukp -- point to the start of index[] medadata
     * rukp -- point to the start of nzval[] array
     * jb   -- block number of block U(k,j), update destination column
     */
    arrive_at_ublock(
		     j, &iukp, &rukp, &jb, &ljb, &nsupc,
         	     iukp0, rukp0, usub, perm_u, xsup, grid
		    );
#else
    jb = usub[iukp];
    ljb = LBj (jb, grid);     /* Local block number of U(k,j). */
    nsupc = SuperSize(jb);
    iukp += UB_DESCRIPTOR; /* Start fstnz of block U(k,j). */
#endif

    j++;
    jj0++;
    jj = iukp;

    while (usub[jj] == klst) ++jj; /* Skip zero segments */

    ldu = klst - usub[jj++];
    ncols = 1;

    /* This loop computes ldu. */
    for (; jj < iukp + nsupc; ++jj) { /* for each column jj in block U(k,j) */
        segsize = klst - usub[jj];
        if (segsize) {
            ++ncols;
            if (segsize > ldu)  ldu = segsize;
        }
    }
#if ( DEBUGlevel>=3 )
    ++num_update;
#endif

#if ( DEBUGlevel>=3 )
    printf ("(%d) k=%d,jb=%d,ldu=%d,ncols=%d,nsupc=%d\n",
	    iam, k, jb, ldu, ncols, nsupc);
    ++num_copy;
#endif

    /* Now copy one block U(k,j) to bigU for GEMM, padding zeros up to ldu. */
    tempu = bigU; /* Copy one block U(k,j) to bigU for GEMM */
    for (jj = iukp; jj < iukp + nsupc; ++jj) {
        segsize = klst - usub[jj];
        if (segsize) {
            lead_zero = ldu - segsize;
            for (i = 0; i < lead_zero; ++i) tempu[i] = zero;
            tempu += lead_zero;
            for (i = 0; i < segsize; ++i) {
                tempu[i] = uval[rukp + i];
            }
            rukp += segsize;
            tempu += segsize;
        }
    }
    tempu = bigU; /* set back to the beginning of the buffer */
#if 0
    rukp -= usub[iukp - 1]; /* Return to start of U(k,j). */
#endif

    nbrow = lsub[1]; /* number of row subscripts in L(:,k) */
    if (myrow == krow) nbrow = lsub[1] - lsub[3]; /* skip diagonal block for those rows. */
    // double ttx =SuperLU_timer_();

    int current_b = 0; /* Each thread starts searching from first block.
                          This records the moving search target.           */
    lptr = lptr0; /* point to the start of index[] in supernode L(:,k) */
    luptr = luptr0;

#ifdef _OPENMP
    /* Sherry -- examine all the shared variables ??
       'firstprivate' ensures that the private variables are initialized
       to the values before entering the loop.  */
#pragma omp parallel for \
    firstprivate(lptr,luptr,ib,current_b) private(lb) \
    default(shared) schedule(dynamic)
#endif
    for (lb = 0; lb < nlb; lb++) { /* Loop through each block in L(:,k) */
        int temp_nbrow; /* automatic variable is private */

        /* Search for the L block that my thread will work on.
           No need to search from 0, can continue at the point where
           it is left from last iteration.
           Note: Blocks may not be sorted in L. Different thread picks up
	   different lb.   */
        for (; current_b < lb; ++current_b) {
            temp_nbrow = lsub[lptr + 1];    /* Number of full rows. */
            lptr += LB_DESCRIPTOR;  /* Skip descriptor. */
            lptr += temp_nbrow;   /* move to next block */
            luptr += temp_nbrow;  /* move to next block */
        }

#ifdef _OPENMP        
        int_t thread_id = omp_get_thread_num ();
#else
        int_t thread_id = 0;
#endif
        doublecomplex * tempv = bigV + ldt*ldt*thread_id;

        int *indirect_thread  = indirect + ldt * thread_id;
        int *indirect2_thread = indirect2 + ldt * thread_id;        
        ib = lsub[lptr];        /* block number of L(i,k) */
        temp_nbrow = lsub[lptr + 1];    /* Number of full rows. */
	/* assert (temp_nbrow <= nbrow); */

        lptr += LB_DESCRIPTOR;  /* Skip descriptor. */

	/*if (thread_id == 0) tt_start = SuperLU_timer_();*/

        /* calling gemm */
	stat->ops[FACT] += 8.0 * (flops_t)temp_nbrow * ldu * ncols;
#if defined (USE_VENDOR_BLAS)
        zgemm_("N", "N", &temp_nbrow, &ncols, &ldu, &alpha,
                   &lusup[luptr + (knsupc - ldu) * nsupr], &nsupr,
                   tempu, &ldu, &beta, tempv, &temp_nbrow, 1, 1);
#else
        zgemm_("N", "N", &temp_nbrow, &ncols, &ldu, &alpha,
                   &lusup[luptr + (knsupc - ldu) * nsupr], &nsupr,
                   tempu, &ldu, &beta, tempv, &temp_nbrow );
#endif

#if 0
	if (thread_id == 0) {
	    tt_end = SuperLU_timer_();
	    LookAheadGEMMTimer += tt_end - tt_start;
	    tt_start = tt_end;
	} 
#endif
        /* Now scattering the output. */
        if (ib < jb) {    /* A(i,j) is in U. */
            zscatter_u (ib, jb,
                       nsupc, iukp, xsup,
                       klst, temp_nbrow,
                       lptr, temp_nbrow, lsub,
                       usub, tempv, Ufstnz_br_ptr, Unzval_br_ptr, grid);
        } else {          /* A(i,j) is in L. */
            zscatter_l (ib, ljb, nsupc, iukp, xsup, klst, temp_nbrow, lptr,
                       temp_nbrow, usub, lsub, tempv,
                       indirect_thread, indirect2_thread, 
                       Lrowind_bc_ptr, Lnzval_bc_ptr, grid);
        }

        ++current_b;         /* Move to next block. */
        lptr += temp_nbrow;
        luptr += temp_nbrow;

#if 0
	if (thread_id == 0) {
	    tt_end = SuperLU_timer_();
	    LookAheadScatterTimer += tt_end - tt_start;
	}
#endif
    } /* end parallel for lb = 0, nlb ... all blocks in L(:,k) */

#if 0
    rukp += usub[iukp - 1]; /* Move to block U(k,j+1) */
#endif
    iukp += nsupc; /* Mov to block U(k,j+1) */

    /* =========================================== *
     * == factorize L(:,j) and send if possible == *
     * =========================================== */
    kk = jb; /* destination column that is just updated */
    kcol = PCOL (kk, grid);
#ifdef ISORT
    kk0 = iperm_u[j - 1];
#else
    kk0 = perm_u[2 * (j - 1)];
#endif
    look_id = kk0 % (1 + num_look_aheads);

    if (look_ahead[kk] == k0 && kcol == mycol) {
        /* current column is the last dependency */
        look_id = kk0 % (1 + num_look_aheads);

        /* Factor diagonal and subdiagonal blocks and test for exact
           singularity.  */
        factored[kk] = 0;

        double tt1 = SuperLU_timer_();

        PZGSTRF2(options, kk0, kk, thresh, Glu_persist, grid, Llu,
                  U_diag_blk_send_req, tag_ub, stat, info);

        pdgstrf2_timer += SuperLU_timer_() - tt1; 

        /* stat->time7 += SuperLU_timer_() - ttt1; */

        /* Multicasts numeric values of L(:,kk) to process rows. */
        send_req = send_reqs[look_id];
        msgcnt = msgcnts[look_id];

        lk = LBj (kk, grid);    /* Local block number. */
        lsub1 = Lrowind_bc_ptr[lk];
        lusup1 = Lnzval_bc_ptr[lk];
        if (lsub1) {
            msgcnt[0] = lsub1[1] + BC_HEADER + lsub1[0] * LB_DESCRIPTOR;
            msgcnt[1] = lsub1[1] * SuperSize (kk);
        } else {
            msgcnt[0] = 0;
            msgcnt[1] = 0;
        }

        scp = &grid->rscp;      /* The scope of process row. */
        for (pj = 0; pj < Pc; ++pj) {
            if (ToSendR[lk][pj] != EMPTY) {
#if ( PROFlevel>=1 )
                TIC (t1);
#endif
                MPI_Isend (lsub1, msgcnt[0], mpi_int_t, pj,
                           SLU_MPI_TAG (0, kk0) /* (4*kk0)%tag_ub */ ,
                           scp->comm, &send_req[pj]);
                MPI_Isend (lusup1, msgcnt[1], SuperLU_MPI_DOUBLE_COMPLEX, pj,
                           SLU_MPI_TAG (1, kk0) /* (4*kk0+1)%tag_ub */ ,
                           scp->comm, &send_req[pj + Pc]);
#if ( PROFlevel>=1 )
                TOC (t2, t1);
                stat->utime[COMM] += t2;
                msg_cnt += 2;
                msg_vol += msgcnt[0] * iword + msgcnt[1] * dword;
#endif
#if ( DEBUGlevel>=2 )
                printf ("[%d] -2- Send L(:,%4d): #lsub %4d, #lusup %4d to Pj %2d, tags %d:%d \n",
                        iam, kk, msgcnt[0], msgcnt[1], pj,
			SLU_MPI_TAG(0,kk0), SLU_MPI_TAG(1,kk0));
#endif
            }  /* end if ( ToSendR[lk][pj] != EMPTY ) */
        } /* end for pj ... */
    } /* end if( look_ahead[kk] == k0 && kcol == mycol ) */
} /* end while j < nub and perm_u[j] 
 * -- Distributed SuperLU routine (version 4.3) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 *
 * Last modified:
 * December 31, 2015  version 4.3
 * 
*/ #include #include "superlu_zdefs.h" /*-- Function prototypes --*/ static void gather_1rhs_diag_to_all(int_t, doublecomplex [], Glu_persist_t *, LocalLU_t *, gridinfo_t *, int_t, int_t [], int_t [], doublecomplex [], doublecomplex []); static void redist_all_to_diag(int_t, doublecomplex [], Glu_persist_t *, LocalLU_t *, gridinfo_t *, int_t [], doublecomplex []); /*! \brief * *
 * Purpose
 * =======
 *
 * pzgsrfs_ABXglobal improves the computed solution to a system of linear   
 * equations and provides error bounds and backward error estimates
 * for the solution. 
 *
 * Arguments
 * =========
 *
 * n      (input) int (global)
 *        The order of the system of linear equations.
 *
 * A      (input) SuperMatrix*
 *	  The original matrix A, or the scaled A if equilibration was done.
 *        A is also permuted into the form Pc*Pr*A*Pc', where Pr and Pc
 *        are permutation matrices. The type of A can be:
 *        Stype = SLU_NCP; Dtype = SLU_Z; Mtype = SLU_GE.
 *
 *        NOTE: Currently, A must reside in all processes when calling
 *              this routine.
 *
 * anorm  (input) double
 *        The norm of the original matrix A, or the scaled A if
 *        equilibration was done.
 *
 * LUstruct (input) LUstruct_t*
 *        The distributed data structures storing L and U factors.
 *        The L and U factors are obtained from pzgstrf for
 *        the possibly scaled and permuted matrix A.
 *        See superlu_ddefs.h for the definition of 'LUstruct_t'.
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh. It contains the MPI communicator, the number
 *        of process rows (NPROW), the number of process columns (NPCOL),
 *        and my process rank. It is an input argument to all the
 *        parallel routines.
 *        Grid can be initialized by subroutine SUPERLU_GRIDINIT.
 *        See superlu_ddefs.h for the definition of 'gridinfo_t'.
 *
 * B      (input) doublecomplex* (global)
 *        The N-by-NRHS right-hand side matrix of the possibly equilibrated
 *        and row permuted system.
 *       
 *        NOTE: Currently, B must reside on all processes when calling
 *              this routine.
 *
 * ldb    (input) int (global)
 *        Leading dimension of matrix B.
 *
 * X      (input/output) doublecomplex* (global)
 *        On entry, the solution matrix X, as computed by PZGSTRS.
 *        On exit, the improved solution matrix X.
 *        If DiagScale = COL or BOTH, X should be premultiplied by diag(C)
 *        in order to obtain the solution to the original system.
 *
 *        NOTE: Currently, X must reside on all processes when calling
 *              this routine.
 *
 * ldx    (input) int (global)
 *        Leading dimension of matrix X.
 *
 * nrhs   (input) int
 *        Number of right-hand sides.
 *
 * berr   (output) double*, dimension (nrhs)
 *         The componentwise relative backward error of each solution   
 *         vector X(j) (i.e., the smallest relative change in   
 *         any element of A or B that makes X(j) an exact solution).
 *
 * stat   (output) SuperLUStat_t*
 *        Record the statistics about the refinement steps.
 *        See util.h for the definition of SuperLUStat_t.
 *
 * info   (output) int*
 *        = 0: successful exit
 *        < 0: if info = -i, the i-th argument had an illegal value
 *        
 * Internal Parameters   
 * ===================   
 *
 * ITMAX is the maximum number of steps of iterative refinement.   
 * 
*/ void pzgsrfs_ABXglobal(int_t n, SuperMatrix *A, double anorm, LUstruct_t *LUstruct, gridinfo_t *grid, doublecomplex *B, int_t ldb, doublecomplex *X, int_t ldx, int nrhs, double *berr, SuperLUStat_t *stat, int *info) { #define ITMAX 20 Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; /* * Data structures used by matrix-vector multiply routine. */ int_t N_update; /* Number of variables updated on this process */ int_t *update; /* vector elements (global index) updated on this processor. */ int_t *bindx; doublecomplex *val; int_t *mv_sup_to_proc; /* Supernode to process mapping in matrix-vector multiply. */ /*-- end data structures for matrix-vector multiply --*/ doublecomplex *b, *ax, *R, *B_col, *temp, *work, *X_col, *x_trs, *dx_trs; double *rwork; int_t notran; int_t count, ii, j, jj, k, knsupc, lk, lwork, nprow, nsupers, nz, p; int i, iam, pkk; int_t *ilsum, *xsup; double eps, lstres; double s, safmin, safe1, safe2; /* NEW STUFF */ int_t num_diag_procs, *diag_procs; /* Record diagonal process numbers. */ int_t *diag_len; /* Length of the X vector on diagonal processes. */ /*-- Function prototypes --*/ extern void pzgstrs1(int_t, LUstruct_t *, gridinfo_t *, doublecomplex *, int, SuperLUStat_t *, int *); /* Test the input parameters. */ *info = 0; if ( n < 0 ) *info = -1; else if ( A->nrow != A->ncol || A->nrow < 0 || A->Stype != SLU_NCP || A->Dtype != SLU_Z || A->Mtype != SLU_GE ) *info = -2; else if ( ldb < SUPERLU_MAX(0, n) ) *info = -10; else if ( ldx < SUPERLU_MAX(0, n) ) *info = -12; else if ( nrhs < 0 ) *info = -13; if (*info != 0) { i = -(*info); pxerr_dist("pzgsrfs_ABXglobal", grid, i); return; } /* Quick return if possible. */ if ( n == 0 || nrhs == 0 ) { return; } /* Initialization. */ iam = grid->iam; nprow = grid->nprow; nsupers = Glu_persist->supno[n-1] + 1; xsup = Glu_persist->xsup; ilsum = Llu->ilsum; notran = 1; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter pzgsrfs_ABXglobal()"); #endif get_diag_procs(n, Glu_persist, grid, &num_diag_procs, &diag_procs, &diag_len); #if ( PRNTlevel>=1 ) if ( !iam ) { printf(".. number of diag processes = " IFMT "\n", num_diag_procs); PrintInt10("diag_procs", num_diag_procs, diag_procs); PrintInt10("diag_len", num_diag_procs, diag_len); } #endif if ( !(mv_sup_to_proc = intCalloc_dist(nsupers)) ) ABORT("Calloc fails for mv_sup_to_proc[]"); pzgsmv_AXglobal_setup(A, Glu_persist, grid, &N_update, &update, &val, &bindx, mv_sup_to_proc); i = CEILING( nsupers, nprow ); /* Number of local block rows */ ii = Llu->ldalsum + i * XK_H; k = SUPERLU_MAX(N_update, sp_ienv_dist(3)); jj = diag_len[0]; for (j = 1; j < num_diag_procs; ++j) jj = SUPERLU_MAX( jj, diag_len[j] ); jj = SUPERLU_MAX( jj, N_update ); lwork = N_update /* For ax and R */ + ii /* For dx_trs */ + ii /* For x_trs */ + k /* For b */ + jj; /* for temp */ if ( !(work = doublecomplexMalloc_dist(lwork)) ) ABORT("Malloc fails for work[]"); ax = R = work; dx_trs = work + N_update; x_trs = dx_trs + ii; b = x_trs + ii; temp = b + k; if ( !(rwork = SUPERLU_MALLOC(N_update * sizeof(double))) ) ABORT("Malloc fails for rwork[]"); #if ( DEBUGlevel>=2 ) { doublecomplex *dwork = doublecomplexMalloc_dist(n); for (i = 0; i < n; ++i) { if ( i & 1 ) dwork[i].r = 1.; else dwork[i].r = 2.; dwork[i].i = 0.; } /* Check correctness of matrix-vector multiply. */ pzgsmv_AXglobal(N_update, update, val, bindx, dwork, ax); PrintDoublecomplex("Mult A*x", N_update, ax); SUPERLU_FREE(dwork); } #endif /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ nz = A->ncol + 1; eps = dmach_dist("Epsilon"); safmin = dmach_dist("Safe minimum"); /* Set SAFE1 essentially to be the underflow threshold times the number of additions in each row. */ safe1 = nz * safmin; safe2 = safe1 / eps; #if ( DEBUGlevel>=1 ) if ( !iam ) printf(".. eps = %e\tanorm = %e\tsafe1 = %e\tsafe2 = %e\n", eps, anorm, safe1, safe2); #endif /* Do for each right-hand side ... */ for (j = 0; j < nrhs; ++j) { count = 0; lstres = 3.; /* Copy X into x on the diagonal processes. */ B_col = &B[j*ldb]; X_col = &X[j*ldx]; for (p = 0; p < num_diag_procs; ++p) { pkk = diag_procs[p]; if ( iam == pkk ) { for (k = p; k < nsupers; k += num_diag_procs) { knsupc = SuperSize( k ); lk = LBi( k, grid ); ii = ilsum[lk] + (lk+1)*XK_H; jj = FstBlockC( k ); for (i = 0; i < knsupc; ++i) x_trs[i+ii] = X_col[i+jj]; dx_trs[ii-XK_H].r = k;/* Block number prepended in header. */ } } } /* Copy B into b distributed the same way as matrix-vector product. */ if ( N_update ) ii = update[0]; for (i = 0; i < N_update; ++i) b[i] = B_col[i + ii]; while (1) { /* Loop until stopping criterion is satisfied. */ /* Compute residual R = B - op(A) * X, where op(A) = A, A**T, or A**H, depending on TRANS. */ /* Matrix-vector multiply. */ pzgsmv_AXglobal(N_update, update, val, bindx, X_col, ax); /* Compute residual. */ for (i = 0; i < N_update; ++i) z_sub(&R[i], &b[i], &ax[i]); /* Compute abs(op(A))*abs(X) + abs(B). */ pzgsmv_AXglobal_abs(N_update, update, val, bindx, X_col, rwork); for (i = 0; i < N_update; ++i) rwork[i] += slud_z_abs1(&b[i]); s = 0.0; for (i = 0; i < N_update; ++i) { if ( rwork[i] > safe2 ) { s = SUPERLU_MAX(s, slud_z_abs1(&R[i]) / rwork[i]); } else if ( rwork[i] != 0.0 ) { s = SUPERLU_MAX(s, (safe1 + slud_z_abs1(&R[i])) / rwork[i]); } /* If temp[i] is exactly 0.0 (computed by PxGSMV), then we know the true residual also must be exactly 0.0. */ } MPI_Allreduce( &s, &berr[j], 1, MPI_DOUBLE, MPI_MAX, grid->comm ); #if ( PRNTlevel>= 1 ) if ( !iam ) printf("(%2d) .. Step " IFMT ": berr[j] = %e\n", iam, count, berr[j]); #endif if ( berr[j] > eps && berr[j] * 2 <= lstres && count < ITMAX ) { /* Compute new dx. */ redist_all_to_diag(n, R, Glu_persist, Llu, grid, mv_sup_to_proc, dx_trs); pzgstrs1(n, LUstruct, grid, dx_trs, 1, stat, info); /* Update solution. */ for (p = 0; p < num_diag_procs; ++p) if ( iam == diag_procs[p] ) for (k = p; k < nsupers; k += num_diag_procs) { lk = LBi( k, grid ); ii = ilsum[lk] + (lk+1)*XK_H; knsupc = SuperSize( k ); for (i = 0; i < knsupc; ++i) z_add(&x_trs[i + ii], &x_trs[i + ii], &dx_trs[i + ii]); } lstres = berr[j]; ++count; /* Transfer x_trs (on diagonal processes) into X (on all processes). */ gather_1rhs_diag_to_all(n, x_trs, Glu_persist, Llu, grid, num_diag_procs, diag_procs, diag_len, X_col, temp); } else { break; } } /* end while */ stat->RefineSteps = count; } /* for j ... */ /* Deallocate storage used by matrix-vector multiplication. */ SUPERLU_FREE(diag_procs); SUPERLU_FREE(diag_len); if ( N_update ) { SUPERLU_FREE(update); SUPERLU_FREE(bindx); SUPERLU_FREE(val); } SUPERLU_FREE(mv_sup_to_proc); SUPERLU_FREE(work); SUPERLU_FREE(rwork); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pzgsrfs_ABXglobal()"); #endif } /* PZGSRFS_ABXGLOBAL */ /*! \brief * *
 * r[] is the residual vector distributed the same way as
 * matrix-vector product.
 * 
*/ static void redist_all_to_diag(int_t n, doublecomplex r[], Glu_persist_t *Glu_persist, LocalLU_t *Llu, gridinfo_t *grid, int_t mv_sup_to_proc[], doublecomplex work[]) { int_t i, ii, k, lk, lr, nsupers; int_t *ilsum, *xsup; int iam, knsupc, psrc, pkk; MPI_Status status; iam = grid->iam; nsupers = Glu_persist->supno[n-1] + 1; xsup = Glu_persist->xsup; ilsum = Llu->ilsum; lr = 0; for (k = 0; k < nsupers; ++k) { pkk = PNUM( PROW( k, grid ), PCOL( k, grid ), grid ); psrc = mv_sup_to_proc[k]; knsupc = SuperSize( k ); lk = LBi( k, grid ); ii = ilsum[lk] + (lk+1)*XK_H; if ( iam == psrc ) { if ( iam != pkk ) { /* Send X component. */ MPI_Send( &r[lr], knsupc, SuperLU_MPI_DOUBLE_COMPLEX, pkk, Xk, grid->comm ); } else { /* Local copy. */ for (i = 0; i < knsupc; ++i) work[i + ii] = r[i + lr]; } lr += knsupc; } else { if ( iam == pkk ) { /* Recv X component. */ MPI_Recv( &work[ii], knsupc, SuperLU_MPI_DOUBLE_COMPLEX, psrc, Xk, grid->comm, &status ); } } } } /* REDIST_ALL_TO_DIAG */ /*! \brief * *
 * Gather the components of x vector on the diagonal processes
 * onto all processes, and combine them into the global vector y.
 * 
*/ static void gather_1rhs_diag_to_all(int_t n, doublecomplex x[], Glu_persist_t *Glu_persist, LocalLU_t *Llu, gridinfo_t *grid, int_t num_diag_procs, int_t diag_procs[], int_t diag_len[], doublecomplex y[], doublecomplex work[]) { int_t i, ii, k, lk, lwork, nsupers, p; int_t *ilsum, *xsup; int iam, knsupc, pkk; iam = grid->iam; nsupers = Glu_persist->supno[n-1] + 1; xsup = Glu_persist->xsup; ilsum = Llu->ilsum; for (p = 0; p < num_diag_procs; ++p) { pkk = diag_procs[p]; if ( iam == pkk ) { /* Copy x vector into a buffer. */ lwork = 0; for (k = p; k < nsupers; k += num_diag_procs) { knsupc = SuperSize( k ); lk = LBi( k, grid ); ii = ilsum[lk] + (lk+1)*XK_H; for (i = 0; i < knsupc; ++i) work[i+lwork] = x[i+ii]; lwork += knsupc; } MPI_Bcast( work, lwork, SuperLU_MPI_DOUBLE_COMPLEX, pkk, grid->comm ); } else { MPI_Bcast( work, diag_len[p], SuperLU_MPI_DOUBLE_COMPLEX, pkk, grid->comm ); } /* Scatter work[] into global y vector. */ lwork = 0; for (k = p; k < nsupers; k += num_diag_procs) { knsupc = SuperSize( k ); ii = FstBlockC( k ); for (i = 0; i < knsupc; ++i) y[i+ii] = work[i+lwork]; lwork += knsupc; } } } /* GATHER_1RHS_DIAG_TO_ALL */ SuperLU_DIST_5.3.0/SRC/zlangs_dist.c0000644013363400111340000000644713233431301015763 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Returns the one norm, or the Frobenius norm, or the infinity norm, or the element of largest value */ /* * File name: zlangs.c * History: Modified from lapack routine ZLANGE */ #include #include "superlu_zdefs.h" /*! \brief
 
    Purpose   
    =======   

    ZLANGS_DIST returns the value of the one norm, or the Frobenius norm, or 
    the infinity norm, or the element of largest absolute value of a 
    real matrix A.   

    Description   
    ===========   

    ZLANGE returns the value   

       ZLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'   
                (   
                ( norm1(A),         NORM = '1', 'O' or 'o'   
                (   
                ( normI(A),         NORM = 'I' or 'i'   
                (   
                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'   

    where  norm1  denotes the  one norm of a matrix (maximum column sum), 
    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and 
    normF  denotes the  Frobenius norm of a matrix (square root of sum of 
    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.   

    Arguments   
    =========   

    NORM    (input) CHARACTER*1   
            Specifies the value to be returned in ZLANGE as described above.   
    A       (input) SuperMatrix*
            The M by N sparse matrix A. 

   ===================================================================== 
*/ double zlangs_dist(char *norm, SuperMatrix *A) { /* Local variables */ NCformat *Astore; doublecomplex *Aval; int i, j, irow; double value=0., sum; double *rwork; Astore = A->Store; Aval = Astore->nzval; if ( SUPERLU_MIN(A->nrow, A->ncol) == 0) { value = 0.; } else if ( strncmp(norm, "M", 1)==0 ) { /* Find max(abs(A(i,j))). */ value = 0.; for (j = 0; j < A->ncol; ++j) for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) value = SUPERLU_MAX( value, slud_z_abs( &Aval[i]) ); } else if ( strncmp(norm, "O", 1)==0 || *(unsigned char *)norm == '1') { /* Find norm1(A). */ value = 0.; for (j = 0; j < A->ncol; ++j) { sum = 0.; for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) sum += slud_z_abs( &Aval[i] ); value = SUPERLU_MAX(value,sum); } } else if ( strncmp(norm, "I", 1)==0 ) { /* Find normI(A). */ if ( !(rwork = (double *) SUPERLU_MALLOC(A->nrow * sizeof(double))) ) ABORT("SUPERLU_MALLOC fails for rwork."); for (i = 0; i < A->nrow; ++i) rwork[i] = 0.; for (j = 0; j < A->ncol; ++j) for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) { irow = Astore->rowind[i]; rwork[irow] += slud_z_abs( &Aval[i] ); } value = 0.; for (i = 0; i < A->nrow; ++i) value = SUPERLU_MAX(value, rwork[i]); SUPERLU_FREE (rwork); } else if ( strncmp(norm, "F", 1)==0 || strncmp(norm, "E", 1)==0 ) { /* Find normF(A). */ ABORT("Not implemented."); } else ABORT("Illegal norm specified."); return (value); } /* zlangs_dist */ SuperLU_DIST_5.3.0/SRC/zreadMM.c0000644013363400111340000001361313233431301014772 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief * Contributed by Francois-Henry Rouet. * */ #include #include #include "superlu_zdefs.h" #undef EXPAND_SYM /*! brief * *
 * Output parameters
 * =================
 *   (nzval, rowind, colptr): (*rowind)[*] contains the row subscripts of
 *      nonzeros in columns of matrix A; (*nzval)[*] the numerical values;
 *	column i of A is given by (*nzval)[k], k = (*rowind)[i],...,
 *      (*rowind)[i+1]-1.
 * 
*/ void zreadMM_dist(FILE *fp, int_t *m, int_t *n, int_t *nonz, doublecomplex **nzval, int_t **rowind, int_t **colptr) { int_t j, k, jsize, nnz, nz, new_nonz; doublecomplex *a, *val; int_t *asub, *xa, *row, *col; int_t zero_base = 0; char *p, line[512], banner[64], mtx[64], crd[64], arith[64], sym[64]; int expand; char *cs; /* File format: * %%MatrixMarket matrix coordinate real general/symmetric/... * % ... * % (optional comments) * % ... * #rows #non-zero * Triplet in the rest of lines: row col value */ /* 1/ read header */ cs = fgets(line,512,fp); for (p=line; *p!='\0'; *p=tolower(*p),p++); if (sscanf(line, "%s %s %s %s %s", banner, mtx, crd, arith, sym) != 5) { printf("Invalid header (first line does not contain 5 tokens)\n"); exit; } if(strcmp(banner,"%%matrixmarket")) { printf("Invalid header (first token is not \"%%%%MatrixMarket\")\n"); exit(-1); } if(strcmp(mtx,"matrix")) { printf("Not a matrix; this driver cannot handle that.\n"); exit(-1); } if(strcmp(crd,"coordinate")) { printf("Not in coordinate format; this driver cannot handle that.\n"); exit(-1); } if(strcmp(arith,"complex")) { if(!strcmp(arith,"real")) { printf("Complex matrix; use dreadMM instead!\n"); exit(-1); } else if(!strcmp(arith, "pattern")) { printf("Pattern matrix; values are needed!\n"); exit(-1); } else { printf("Unknown arithmetic\n"); exit(-1); } } if(strcmp(sym,"general")) { printf("Symmetric matrix: will be expanded\n"); expand=1; } else expand=0; /* 2/ Skip comments */ while(banner[0]=='%') { cs = fgets(line,512,fp); sscanf(line,"%s",banner); } /* 3/ Read n and nnz */ #ifdef _LONGINT sscanf(line, "%ld%ld%ld",m, n, nonz); #else sscanf(line, "%d%d%d",m, n, nonz); #endif if(*m!=*n) { printf("Rectangular matrix!. Abort\n"); exit(-1); } if(expand) new_nonz = 2 * *nonz - *n; else new_nonz = *nonz; *m = *n; printf("m %lld, n %lld, nonz %lld\n", (long long) *m, (long long) *n, (long long) *nonz); fflush(stdout); zallocateA_dist(*n, new_nonz, nzval, rowind, colptr); /* Allocate storage */ a = *nzval; asub = *rowind; xa = *colptr; if ( !(val = doublecomplexMalloc_dist(new_nonz)) ) ABORT("Malloc fails for val[]"); if ( !(row = (int_t *) intMalloc_dist(new_nonz)) ) ABORT("Malloc fails for row[]"); if ( !(col = (int_t *) intMalloc_dist(new_nonz)) ) ABORT("Malloc fails for col[]"); for (j = 0; j < *n; ++j) xa[j] = 0; /* 4/ Read triplets of values */ for (nnz = 0, nz = 0; nnz < *nonz; ++nnz) { #ifdef _LONGINT j = fscanf(fp, "%lld%lld%lf%lf\n", &row[nz], &col[nz], &val[nz].r, &val[nz].i); #else j = fscanf(fp, "%d%d%lf%lf\n", &row[nz], &col[nz], &val[nz].r, &val[nz].i); #endif if ( nnz == 0 ) /* first nonzero */ { if ( row[0] == 0 || col[0] == 0 ) { zero_base = 1; printf("triplet file: row/col indices are zero-based.\n"); } else printf("triplet file: row/col indices are one-based.\n"); fflush(stdout); } if ( !zero_base ) { /* Change to 0-based indexing. */ --row[nz]; --col[nz]; } if (row[nz] < 0 || row[nz] >= *m || col[nz] < 0 || col[nz] >= *n /*|| val[nz] == 0.*/) { fprintf(stderr, "nz " IFMT ", (" IFMT ", " IFMT ") = {%e\t%e} out of bound, removed\n", nz, row[nz], col[nz], val[nz].r, val[nz].i); exit(-1); } else { ++xa[col[nz]]; if(expand) { if ( row[nz] != col[nz] ) { /* Excluding diagonal */ ++nz; row[nz] = col[nz-1]; col[nz] = row[nz-1]; val[nz] = val[nz-1]; ++xa[col[nz]]; } } ++nz; } } *nonz = nz; if(expand) { printf("new_nonz after symmetric expansion:\t" IFMT "\n", *nonz); fflush(stdout); } /* Initialize the array of column pointers */ k = 0; jsize = xa[0]; xa[0] = 0; for (j = 1; j < *n; ++j) { k += jsize; jsize = xa[j]; xa[j] = k; } /* Copy the triplets into the column oriented storage */ for (nz = 0; nz < *nonz; ++nz) { j = col[nz]; k = xa[j]; asub[k] = row[nz]; a[k] = val[nz]; ++xa[j]; } /* Reset the column pointers to the beginning of each column */ for (j = *n; j > 0; --j) xa[j] = xa[j-1]; xa[0] = 0; SUPERLU_FREE(val); SUPERLU_FREE(row); SUPERLU_FREE(col); #ifdef CHK_INPUT int i; for (i = 0; i < *n; i++) { printf("Col %d, xa %d\n", i, xa[i]); for (k = xa[i]; k < xa[i+1]; k++) printf("%d\t%16.10f\n", asub[k], a[k]); } #endif } static void zreadrhs(int m, doublecomplex *b) { FILE *fp, *fopen(); int i; if ( !(fp = fopen("b.dat", "r")) ) { fprintf(stderr, "zreadrhs: file does not exist\n"); exit(-1); } for (i = 0; i < m; ++i) i = fscanf(fp, "%lf%lf\n", &b[i].r, &b[i].i); fclose(fp); } SuperLU_DIST_5.3.0/SRC/zreadhb.c0000644013363400111340000002103513233431301015047 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Read a DOUBLE COMPLEX PRECISION matrix stored in Harwell-Boeing format * *
 * -- Distributed SuperLU routine (version 1.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 * 
*/ #include "dcomplex.h" #include #include #include "superlu_zdefs.h" /* * Prototypes */ static void ReadVector(FILE *, int_t, int_t *, int_t, int_t); static void zReadValues(FILE *, int_t, doublecomplex *, int_t, int_t); static int DumpLine(FILE *); static int ParseIntFormat(char *, int_t *, int_t *); static int ParseFloatFormat(char *, int_t *, int_t *); /*! \brief * *
 * Purpose
 * =======
 * 
 * Read a DOUBLE COMPLEX PRECISION matrix stored in Harwell-Boeing format 
 * as described below.
 * 
 * Line 1 (A72,A8) 
 *  	Col. 1 - 72   Title (TITLE) 
 *	Col. 73 - 80  Key (KEY) 
 * 
 * Line 2 (5I14) 
 * 	Col. 1 - 14   Total number of lines excluding header (TOTCRD) 
 * 	Col. 15 - 28  Number of lines for pointers (PTRCRD) 
 * 	Col. 29 - 42  Number of lines for row (or variable) indices (INDCRD) 
 * 	Col. 43 - 56  Number of lines for numerical values (VALCRD) 
 *	Col. 57 - 70  Number of lines for right-hand sides (RHSCRD) 
 *                    (including starting guesses and solution vectors 
 *		       if present) 
 *           	      (zero indicates no right-hand side data is present) 
 *
 * Line 3 (A3, 11X, 4I14) 
 *   	Col. 1 - 3    Matrix type (see below) (MXTYPE) 
 * 	Col. 15 - 28  Number of rows (or variables) (NROW) 
 * 	Col. 29 - 42  Number of columns (or elements) (NCOL) 
 *	Col. 43 - 56  Number of row (or variable) indices (NNZERO) 
 *	              (equal to number of entries for assembled matrices) 
 * 	Col. 57 - 70  Number of elemental matrix entries (NELTVL) 
 *	              (zero in the case of assembled matrices) 
 * Line 4 (2A16, 2A20) 
 * 	Col. 1 - 16   Format for pointers (PTRFMT) 
 *	Col. 17 - 32  Format for row (or variable) indices (INDFMT) 
 *	Col. 33 - 52  Format for numerical values of coefficient matrix (VALFMT) 
 * 	Col. 53 - 72 Format for numerical values of right-hand sides (RHSFMT) 
 *
 * Line 5 (A3, 11X, 2I14) Only present if there are right-hand sides present 
 *    	Col. 1 	      Right-hand side type: 
 *	         	  F for full storage or M for same format as matrix 
 *    	Col. 2        G if a starting vector(s) (Guess) is supplied. (RHSTYP) 
 *    	Col. 3        X if an exact solution vector(s) is supplied. 
 *	Col. 15 - 28  Number of right-hand sides (NRHS) 
 *	Col. 29 - 42  Number of row indices (NRHSIX) 
 *          	      (ignored in case of unassembled matrices) 
 *
 * The three character type field on line 3 describes the matrix type. 
 * The following table lists the permitted values for each of the three 
 * characters. As an example of the type field, RSA denotes that the matrix 
 * is real, symmetric, and assembled. 
 *
 * First Character: 
 *	R Real matrix 
 *	C Complex matrix 
 *	P Pattern only (no numerical values supplied) 
 *
 * Second Character: 
 *	S Symmetric 
 *	U Unsymmetric 
 *	H Hermitian 
 *	Z Skew symmetric 
 *	R Rectangular 
 *
 * Third Character: 
 *	A Assembled 
 *	E Elemental matrices (unassembled) 
 * 
*/ void zreadhb_dist(int iam, FILE *fp, int_t *nrow, int_t *ncol, int_t *nonz, doublecomplex **nzval, int_t **rowind, int_t **colptr) { register int_t i, numer_lines, rhscrd = 0; int_t tmp, colnum, colsize, rownum, rowsize, valnum, valsize; char buf[100], type[4]; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(0, "Enter zreadhb_dist()"); #endif /* Line 1 */ fgets(buf, 100, fp); /* Line 2 */ for (i=0; i<5; i++) { fscanf(fp, "%14c", buf); buf[14] = 0; tmp = atoi(buf); /*sscanf(buf, "%d", &tmp);*/ if (i == 3) numer_lines = tmp; if (i == 4 && tmp) rhscrd = tmp; } DumpLine(fp); /* Line 3 */ fscanf(fp, "%3c", type); fscanf(fp, "%11c", buf); /* pad */ type[3] = 0; #if ( DEBUGlevel>=1 ) if ( !iam ) printf("Matrix type %s\n", type); #endif fscanf(fp, "%14c", buf); *nrow = atoi(buf); fscanf(fp, "%14c", buf); *ncol = atoi(buf); fscanf(fp, "%14c", buf); *nonz = atoi(buf); fscanf(fp, "%14c", buf); tmp = atoi(buf); if (tmp != 0) if ( !iam ) printf("This is not an assembled matrix!\n"); if (*nrow != *ncol) if ( !iam ) printf("Matrix is not square.\n"); DumpLine(fp); /* Allocate storage for the three arrays ( nzval, rowind, colptr ) */ zallocateA_dist(*ncol, *nonz, nzval, rowind, colptr); /* Line 4: format statement */ fscanf(fp, "%16c", buf); ParseIntFormat(buf, &colnum, &colsize); fscanf(fp, "%16c", buf); ParseIntFormat(buf, &rownum, &rowsize); fscanf(fp, "%20c", buf); ParseFloatFormat(buf, &valnum, &valsize); fscanf(fp, "%20c", buf); DumpLine(fp); /* Line 5: right-hand side */ if ( rhscrd ) DumpLine(fp); /* skip RHSFMT */ #if ( DEBUGlevel>=1 ) if ( !iam ) { printf(IFMT " rows, " IFMT " nonzeros\n", *nrow, *nonz); printf("colnum " IFMT ", colsize " IFMT "\n", colnum, colsize); printf("rownum " IFMT ", rowsize " IFMT "\n", rownum, rowsize); printf("valnum " IFMT ", valsize " IFMT "\n", valnum, valsize); } #endif ReadVector(fp, *ncol+1, *colptr, colnum, colsize); #if ( DEBUGlevel>=1 ) if ( !iam ) printf("read colptr[" IFMT "] = " IFMT "\n", *ncol, (*colptr)[*ncol]); #endif ReadVector(fp, *nonz, *rowind, rownum, rowsize); #if ( DEBUGlevel>=1 ) if ( !iam ) printf("read rowind[" IFMT "] = " IFMT "\n", *nonz-1, (*rowind)[*nonz-1]); #endif if ( numer_lines ) { zReadValues(fp, *nonz, *nzval, valnum, valsize); } fclose(fp); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(0, "Exit zreadhb_dist()"); #endif } /* Eat up the rest of the current line */ static int DumpLine(FILE *fp) { register int c; while ((c = fgetc(fp)) != '\n') ; return 0; } static int ParseIntFormat(char *buf, int_t *num, int_t *size) { char *tmp; tmp = buf; while (*tmp++ != '(') ; *num = atoi(tmp); while (*tmp != 'I' && *tmp != 'i') ++tmp; ++tmp; *size = atoi(tmp); return 0; } static int ParseFloatFormat(char *buf, int_t *num, int_t *size) { char *tmp, *period; tmp = buf; while (*tmp++ != '(') ; *num = atoi(tmp); while (*tmp != 'E' && *tmp != 'e' && *tmp != 'D' && *tmp != 'd' && *tmp != 'F' && *tmp != 'f') { /* May find kP before nE/nD/nF, like (1P6F13.6). In this case the num picked up refers to P, which should be skipped. */ if (*tmp=='p' || *tmp=='P') { ++tmp; *num = atoi(tmp); /*sscanf(tmp, "%d", num);*/ } else { ++tmp; } } ++tmp; period = tmp; while (*period != '.' && *period != ')') ++period ; *period = '\0'; *size = atoi(tmp); return 0; } static void ReadVector(FILE *fp, int_t n, int_t *where, int_t perline, int_t persize) { register int_t i, j, item; char tmp, buf[100]; i = 0; while (i < n) { fgets(buf, 100, fp); /* read a line at a time */ for (j=0; j * -- Distributed SuperLU routine (version 4.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley. * August 15, 2014 * *
* * Purpose * ======= * * Read a DOUBLE COMPLEX PRECISION matrix stored in Rutherford-Boeing format * as described below. * * Line 1 (A72, A8) * Col. 1 - 72 Title (TITLE) * Col. 73 - 80 Matrix name / identifier (MTRXID) * * Line 2 (I14, 3(1X, I13)) * Col. 1 - 14 Total number of lines excluding header (TOTCRD) * Col. 16 - 28 Number of lines for pointers (PTRCRD) * Col. 30 - 42 Number of lines for row (or variable) indices (INDCRD) * Col. 44 - 56 Number of lines for numerical values (VALCRD) * * Line 3 (A3, 11X, 4(1X, I13)) * Col. 1 - 3 Matrix type (see below) (MXTYPE) * Col. 15 - 28 Compressed Column: Number of rows (NROW) * Elemental: Largest integer used to index variable (MVAR) * Col. 30 - 42 Compressed Column: Number of columns (NCOL) * Elemental: Number of element matrices (NELT) * Col. 44 - 56 Compressed Column: Number of entries (NNZERO) * Elemental: Number of variable indeces (NVARIX) * Col. 58 - 70 Compressed Column: Unused, explicitly zero * Elemental: Number of elemental matrix entries (NELTVL) * * Line 4 (2A16, A20) * Col. 1 - 16 Fortran format for pointers (PTRFMT) * Col. 17 - 32 Fortran format for row (or variable) indices (INDFMT) * Col. 33 - 52 Fortran format for numerical values of coefficient matrix * (VALFMT) * (blank in the case of matrix patterns) * * The three character type field on line 3 describes the matrix type. * The following table lists the permitted values for each of the three * characters. As an example of the type field, RSA denotes that the matrix * is real, symmetric, and assembled. * * First Character: * R Real matrix * C Complex matrix * I integer matrix * P Pattern only (no numerical values supplied) * Q Pattern only (numerical values supplied in associated auxiliary value * file) * * Second Character: * S Symmetric * U Unsymmetric * H Hermitian * Z Skew symmetric * R Rectangular * * Third Character: * A Compressed column form * E Elemental form * *
*/ #include #include #include "superlu_zdefs.h" /*! \brief Eat up the rest of the current line */ static int DumpLine(FILE *fp) { register int c; while ((c = fgetc(fp)) != '\n') ; return 0; } static int ParseIntFormat(char *buf, int_t *num, int_t *size) { char *tmp; tmp = buf; while (*tmp++ != '(') ; *num = atoi(tmp); while (*tmp != 'I' && *tmp != 'i') ++tmp; ++tmp; *size = atoi(tmp); return 0; } static int ParseFloatFormat(char *buf, int_t *num, int_t *size) { char *tmp, *period; tmp = buf; while (*tmp++ != '(') ; *num = atoi(tmp); /*sscanf(tmp, "%d", num);*/ while (*tmp != 'E' && *tmp != 'e' && *tmp != 'D' && *tmp != 'd' && *tmp != 'F' && *tmp != 'f') { /* May find kP before nE/nD/nF, like (1P6F13.6). In this case the num picked up refers to P, which should be skipped. */ if (*tmp=='p' || *tmp=='P') { ++tmp; *num = atoi(tmp); /*sscanf(tmp, "%d", num);*/ } else { ++tmp; } } ++tmp; period = tmp; while (*period != '.' && *period != ')') ++period ; *period = '\0'; *size = atoi(tmp); /*sscanf(tmp, "%2d", size);*/ return 0; } static int ReadVector(FILE *fp, int_t n, int_t *where, int_t perline, int_t persize) { register int_t i, j, item; char tmp, buf[100]; i = 0; while (i < n) { fgets(buf, 100, fp); /* read a line at a time */ for (j=0; j * On input, nonz/nzval/rowind/colptr represents lower part of a symmetric * matrix. On exit, it represents the full matrix with lower and upper parts. *
*/ static void FormFullA(int_t n, int_t *nonz, doublecomplex **nzval, int_t **rowind, int_t **colptr) { register int_t i, j, k, col, new_nnz; int_t *t_rowind, *t_colptr, *al_rowind, *al_colptr, *a_rowind, *a_colptr; int_t *marker; doublecomplex *t_val, *al_val, *a_val; al_rowind = *rowind; al_colptr = *colptr; al_val = *nzval; if ( !(marker = (int_t *) SUPERLU_MALLOC( (n+1) * sizeof(int_t)) ) ) ABORT("SUPERLU_MALLOC fails for marker[]"); if ( !(t_colptr = (int_t *) SUPERLU_MALLOC( (n+1) * sizeof(int_t)) ) ) ABORT("SUPERLU_MALLOC t_colptr[]"); if ( !(t_rowind = (int_t *) SUPERLU_MALLOC( *nonz * sizeof(int_t)) ) ) ABORT("SUPERLU_MALLOC fails for t_rowind[]"); if ( !(t_val = (doublecomplex*) SUPERLU_MALLOC( *nonz * sizeof(doublecomplex)) ) ) ABORT("SUPERLU_MALLOC fails for t_val[]"); /* Get counts of each column of T, and set up column pointers */ for (i = 0; i < n; ++i) marker[i] = 0; for (j = 0; j < n; ++j) { for (i = al_colptr[j]; i < al_colptr[j+1]; ++i) ++marker[al_rowind[i]]; } t_colptr[0] = 0; for (i = 0; i < n; ++i) { t_colptr[i+1] = t_colptr[i] + marker[i]; marker[i] = t_colptr[i]; } /* Transpose matrix A to T */ for (j = 0; j < n; ++j) for (i = al_colptr[j]; i < al_colptr[j+1]; ++i) { col = al_rowind[i]; t_rowind[marker[col]] = j; t_val[marker[col]] = al_val[i]; ++marker[col]; } new_nnz = *nonz * 2 - n; if ( !(a_colptr = (int_t *) SUPERLU_MALLOC( (n+1) * sizeof(int_t)) ) ) ABORT("SUPERLU_MALLOC a_colptr[]"); if ( !(a_rowind = (int_t *) SUPERLU_MALLOC( new_nnz * sizeof(int_t)) ) ) ABORT("SUPERLU_MALLOC fails for a_rowind[]"); if ( !(a_val = (doublecomplex*) SUPERLU_MALLOC( new_nnz * sizeof(doublecomplex)) ) ) ABORT("SUPERLU_MALLOC fails for a_val[]"); a_colptr[0] = 0; k = 0; for (j = 0; j < n; ++j) { for (i = t_colptr[j]; i < t_colptr[j+1]; ++i) { if ( t_rowind[i] != j ) { /* not diagonal */ a_rowind[k] = t_rowind[i]; a_val[k] = t_val[i]; ++k; } } for (i = al_colptr[j]; i < al_colptr[j+1]; ++i) { a_rowind[k] = al_rowind[i]; a_val[k] = al_val[i]; ++k; } a_colptr[j+1] = k; } printf("FormFullA: new_nnz = " IFMT ", k = " IFMT "\n", new_nnz, k); SUPERLU_FREE(al_val); SUPERLU_FREE(al_rowind); SUPERLU_FREE(al_colptr); SUPERLU_FREE(marker); SUPERLU_FREE(t_val); SUPERLU_FREE(t_rowind); SUPERLU_FREE(t_colptr); *nzval = a_val; *rowind = a_rowind; *colptr = a_colptr; *nonz = new_nnz; } void zreadrb_dist(int iam, FILE *fp, int_t *nrow, int_t *ncol, int_t *nonz, doublecomplex **nzval, int_t **rowind, int_t **colptr) { register int_t i, numer_lines = 0; int_t tmp, colnum, colsize, rownum, rowsize, valnum, valsize; char buf[100], type[4]; int sym; /* Line 1 */ fgets(buf, 100, fp); fputs(buf, stdout); /* Line 2 */ for (i=0; i<4; i++) { fscanf(fp, "%14c", buf); buf[14] = 0; tmp = atoi(buf); /*sscanf(buf, "%d", &tmp);*/ if (i == 3) numer_lines = tmp; } DumpLine(fp); /* Line 3 */ fscanf(fp, "%3c", type); fscanf(fp, "%11c", buf); /* pad */ type[3] = 0; #if (DEBUGlevel >= 1) if ( !iam ) printf("Matrix type %s\n", type); #endif fscanf(fp, "%14c", buf); *nrow = atoi(buf); fscanf(fp, "%14c", buf); *ncol = atoi(buf); fscanf(fp, "%14c", buf); *nonz = atoi(buf); fscanf(fp, "%14c", buf); tmp = atoi(buf); if (tmp != 0) if ( !iam ) printf("This is not an assembled matrix!\n"); if (*nrow != *ncol) if ( !iam ) printf("Matrix is not square.\n"); DumpLine(fp); /* Allocate storage for the three arrays ( nzval, rowind, colptr ) */ zallocateA_dist(*ncol, *nonz, nzval, rowind, colptr); /* Line 4: format statement */ fscanf(fp, "%16c", buf); ParseIntFormat(buf, &colnum, &colsize); fscanf(fp, "%16c", buf); ParseIntFormat(buf, &rownum, &rowsize); fscanf(fp, "%20c", buf); ParseFloatFormat(buf, &valnum, &valsize); DumpLine(fp); #if (DEBUGlevel >= 1) if ( !iam ) { printf(IFMT " rows, " IFMT " nonzeros\n", *nrow, *nonz); printf("colnum " IFMT ", colsize " IFMT "\n", colnum, colsize); printf("rownum " IFMT ", rowsize " IFMT "\n", rownum, rowsize); printf("valnum " IFMT ", valsize " IFMT "\n", valnum, valsize); } #endif ReadVector(fp, *ncol+1, *colptr, colnum, colsize); ReadVector(fp, *nonz, *rowind, rownum, rowsize); if ( numer_lines ) { zReadValues(fp, *nonz, *nzval, valnum, valsize); } sym = (type[1] == 'S' || type[1] == 's'); if ( sym ) { FormFullA(*ncol, nonz, nzval, rowind, colptr); } fclose(fp); } SuperLU_DIST_5.3.0/SRC/dldperm_dist.c0000644013363400111340000001323513233431301016105 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Finds a row permutation so that the matrix has large entries on the diagonal * *
 * -- Distributed SuperLU routine (version 1.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 * 
*/ #include "superlu_ddefs.h" extern void mc64ad_dist(int_t*, int_t*, int_t*, int_t [], int_t [], double [], int_t*, int_t [], int_t*, int_t[], int_t*, double [], int_t [], int_t []); /*! \brief * *
 * Purpose
 * =======
 *
 *   DLDPERM finds a row permutation so that the matrix has large
 *   entries on the diagonal.
 *
 * Arguments
 * =========
 *
 * job    (input) int
 *        Control the action. Possible values for JOB are:
 *        = 1 : Compute a row permutation of the matrix so that the
 *              permuted matrix has as many entries on its diagonal as
 *              possible. The values on the diagonal are of arbitrary size.
 *              HSL subroutine MC21A/AD is used for this.
 *        = 2 : Compute a row permutation of the matrix so that the smallest 
 *              value on the diagonal of the permuted matrix is maximized.
 *        = 3 : Compute a row permutation of the matrix so that the smallest
 *              value on the diagonal of the permuted matrix is maximized.
 *              The algorithm differs from the one used for JOB = 2 and may
 *              have quite a different performance.
 *        = 4 : Compute a row permutation of the matrix so that the sum
 *              of the diagonal entries of the permuted matrix is maximized.
 *        = 5 : Compute a row permutation of the matrix so that the product
 *              of the diagonal entries of the permuted matrix is maximized
 *              and vectors to scale the matrix so that the nonzero diagonal 
 *              entries of the permuted matrix are one in absolute value and 
 *              all the off-diagonal entries are less than or equal to one in 
 *              absolute value.
 *        Restriction: 1 <= JOB <= 5.
 *
 * n      (input) int
 *        The order of the matrix.
 *
 * nnz    (input) int
 *        The number of nonzeros in the matrix.
 *
 * adjncy (input) int*, of size nnz
 *        The adjacency structure of the matrix, which contains the row
 *        indices of the nonzeros.
 *
 * colptr (input) int*, of size n+1
 *        The pointers to the beginning of each column in ADJNCY.
 *
 * nzval  (input) double*, of size nnz
 *        The nonzero values of the matrix. nzval[k] is the value of
 *        the entry corresponding to adjncy[k].
 *        It is not used if job = 1.
 *
 * perm   (output) int*, of size n
 *        The permutation vector. perm[i] = j means row i in the
 *        original matrix is in row j of the permuted matrix.
 *
 * u      (output) double*, of size n
 *        If job = 5, the natural logarithms of the row scaling factors. 
 *
 * v      (output) double*, of size n
 *        If job = 5, the natural logarithms of the column scaling factors. 
 *        The scaled matrix B has entries b_ij = a_ij * exp(u_i + v_j).
 * 
*/ int dldperm_dist(int_t job, int_t n, int_t nnz, int_t colptr[], int_t adjncy[], double nzval[], int_t *perm, double u[], double v[]) { int_t i, liw, ldw, num; int_t *iw, icntl[10], info[10]; double *dw; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(0, "Enter dldperm_dist()"); #endif liw = 5*n; if ( job == 3 ) liw = 10*n + nnz; if ( !(iw = intMalloc_dist(liw)) ) ABORT("Malloc fails for iw[]"); ldw = 3*n + nnz; if ( !(dw = doubleMalloc_dist(ldw)) ) ABORT("Malloc fails for dw[]"); /* Increment one to get 1-based indexing. */ for (i = 0; i <= n; ++i) ++colptr[i]; for (i = 0; i < nnz; ++i) ++adjncy[i]; #if ( DEBUGlevel>=2 ) printf("LDPERM(): n %d, nnz %d\n", n, nnz); PrintInt10("colptr", n+1, colptr); PrintInt10("adjncy", nnz, adjncy); #endif /* * NOTE: * ===== * * MC64AD assumes that column permutation vector is defined as: * perm(i) = j means column i of permuted A is in column j of original A. * * Since a symmetric permutation preserves the diagonal entries. Then * by the following relation: * P'(A*P')P = P'A * we can apply inverse(perm) to rows of A to get large diagonal entries. * But, since 'perm' defined in MC64AD happens to be the reverse of * SuperLU's definition of permutation vector, therefore, it is already * an inverse for our purpose. We will thus use it directly. * */ mc64id_dist(icntl); /* Suppress error and warning messages. */ icntl[0] = -1; icntl[1] = -1; mc64ad_dist(&job, &n, &nnz, colptr, adjncy, nzval, &num, perm, &liw, iw, &ldw, dw, icntl, info); #if ( DEBUGlevel>=2 ) PrintInt10("perm", n, perm); printf(".. After MC64AD info %d\tsize of matching %d\n", info[0], num); #endif if ( info[0] == 1 ) { /* Structurally singular */ printf(".. The last " IFMT " permutations:\n", n-num); PrintInt10("perm", n-num, &perm[num]); } /* Restore to 0-based indexing. */ for (i = 0; i <= n; ++i) --colptr[i]; for (i = 0; i < nnz; ++i) --adjncy[i]; for (i = 0; i < n; ++i) --perm[i]; if ( job == 5 ) for (i = 0; i < n; ++i) { u[i] = dw[i]; v[i] = dw[n+i]; } SUPERLU_FREE(iw); SUPERLU_FREE(dw); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(0, "Exit dldperm_dist()"); #endif return (info[0]); } SuperLU_DIST_5.3.0/SRC/dscatter.c0000644013363400111340000003570213233431301015247 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Scatter the computed blocks into LU destination. * *
 * -- Distributed SuperLU routine (version 5.2) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * October 1, 2014
 *
 * Modified: 
 *   September 18, 2017, enable SIMD vectorized scatter operation.
 *   
 */
#include 
#include "superlu_ddefs.h"

static void
dscatter_l_1 (int ib,
           int ljb,
           int nsupc,
           int_t iukp,
           int_t* xsup,
           int klst,
           int nbrow,
           int_t lptr,
           int temp_nbrow,
           int * usub,
           int * lsub,
           double *tempv,
           int * indirect_thread,
           int_t ** Lrowind_bc_ptr, double **Lnzval_bc_ptr,
	   gridinfo_t * grid)
{
    // TAU_STATIC_TIMER_START("SCATTER_LB");
    // printf("hello\n");
    int_t rel, i, segsize, jj;
    double *nzval;
    int_t *index = Lrowind_bc_ptr[ljb];
    int_t ldv = index[1];       /* LDA of the dest lusup. */
    int_t lptrj = BC_HEADER;
    int_t luptrj = 0;
    int_t ijb = index[lptrj];
    while (ijb != ib)
    {
        /* Search for dest block --
           blocks are not ordered! */
        luptrj += index[lptrj + 1];
        lptrj += LB_DESCRIPTOR + index[lptrj + 1];

        ijb = index[lptrj];
    }
    /*
     * Build indirect table. This is needed because the
     * indices are not sorted for the L blocks.
     */
    int_t fnz = FstBlockC (ib);
    lptrj += LB_DESCRIPTOR;
    for (i = 0; i < index[lptrj - 1]; ++i)
    {
        rel = index[lptrj + i] - fnz;
        indirect_thread[rel] = i;

    }

    nzval = Lnzval_bc_ptr[ljb] + luptrj;
    // tempv =bigV + (cum_nrow + cum_ncol*nbrow);
    for (jj = 0; jj < nsupc; ++jj)
    {
        segsize = klst - usub[iukp + jj];
        // printf("segsize %d \n",segsize);
        if (segsize) {
            /*#pragma _CRI cache_bypass nzval,tempv */
            for (i = 0; i < temp_nbrow; ++i) {
                rel = lsub[lptr + i] - fnz;
                nzval[indirect_thread[rel]] -= tempv[i];
                // printf("i (src) %d, perm (dest) %d  \n",i,indirect_thread[rel]);
#ifdef PI_DEBUG
                double zz = 0.0;
                // if(!(*(long*)&zz == *(long*)&tempv[i]) )
                printf ("(%d %d, %0.3e, %0.3e, %3e ) ", ljb,
                        nzval - Lnzval_bc_ptr[ljb] + indirect_thread[rel],
                        nzval[indirect_thread[rel]] + tempv[i],
                        nzval[indirect_thread[rel]],tempv[i]);
                //printing triplets (location??, old value, new value ) if none of them is zero
#endif
            }
            // printf("\n");
            tempv += nbrow;
#ifdef PI_DEBUG
            // printf("\n");
#endif
        }
        nzval += ldv;
        // printf("%d\n",nzval );
    }
    // TAU_STATIC_TIMER_STOP("SCATTER_LB");
} /* dscatter_l_1 */

static void
dscatter_l (
           int ib,    /* row block number of source block L(i,k) */
           int ljb,   /* local column block number of dest. block L(i,j) */
           int nsupc, /* number of columns in destination supernode */
           int_t iukp, /* point to destination supernode's index[] */
           int_t* xsup,
           int klst,
           int nbrow,  /* LDA of the block in tempv[] */
           int_t lptr, /* Input, point to index[] location of block L(i,k) */
	   int temp_nbrow, /* number of rows of source block L(i,k) */
           int_t* usub,
           int_t* lsub,
           double *tempv,
           int* indirect_thread,int* indirect2,
           int_t ** Lrowind_bc_ptr, double **Lnzval_bc_ptr,
           gridinfo_t * grid)
{
    
    int_t rel, i, segsize, jj;
    double *nzval;
    int_t *index = Lrowind_bc_ptr[ljb];
    int_t ldv = index[1];       /* LDA of the destination lusup. */
    int_t lptrj = BC_HEADER;
    int_t luptrj = 0;
    int_t ijb = index[lptrj];
    
    while (ijb != ib)  /* Search for destination block L(i,j) */
    {
        luptrj += index[lptrj + 1];
        lptrj += LB_DESCRIPTOR + index[lptrj + 1];
        ijb = index[lptrj];
    }
    
    /*
     * Build indirect table. This is needed because the indices are not sorted
     * in the L blocks.
     */
    int_t fnz = FstBlockC (ib);
    int_t dest_nbrow; 
    lptrj += LB_DESCRIPTOR;
    dest_nbrow=index[lptrj - 1];
    
#if (_OPENMP>=201307)
#pragma omp simd
#endif
    for (i = 0; i < dest_nbrow; ++i) {
        rel = index[lptrj + i] - fnz;
        indirect_thread[rel] = i;

    }

#if (_OPENMP>=201307)
#pragma omp simd
#endif
    /* can be precalculated? */
    for (i = 0; i < temp_nbrow; ++i) { /* Source index is a subset of dest. */
        rel = lsub[lptr + i] - fnz;
        indirect2[i] =indirect_thread[rel]; 
    }

    nzval = Lnzval_bc_ptr[ljb] + luptrj; /* Destination block L(i,j) */
#ifdef __INTEL_COMPILER
#pragma ivdep
#endif
    for (jj = 0; jj < nsupc; ++jj) {
        segsize = klst - usub[iukp + jj];
        if (segsize) {
#if (_OPENMP>=201307)
#pragma omp simd
#endif
            for (i = 0; i < temp_nbrow; ++i) {
                nzval[indirect2[i]] -= tempv[i];
            }
            tempv += nbrow;
        }
        nzval += ldv;
    }
    
} /* dscatter_l */


static void
dscatter_u (int ib,
           int jb,
           int nsupc,
           int_t iukp,
           int_t * xsup,
           int klst,
 	   int nbrow,      /* LDA of the block in tempv[] */
           int_t lptr,     /* point to index location of block L(i,k) */
	   int temp_nbrow, /* number of rows of source block L(i,k) */
           int_t* lsub,
           int_t* usub,
           double* tempv,
           int_t ** Ufstnz_br_ptr, double **Unzval_br_ptr,
           gridinfo_t * grid)
{
#ifdef PI_DEBUG
    printf ("A(%d,%d) goes to U block \n", ib, jb);
#endif
    // TAU_STATIC_TIMER_START("SCATTER_U");
    // TAU_STATIC_TIMER_START("SCATTER_UB");

    int_t jj, i, fnz, rel;
    int segsize;
    double *ucol;
    int_t ilst = FstBlockC (ib + 1);
    int_t lib = LBi (ib, grid);
    int_t *index = Ufstnz_br_ptr[lib];

    /* Reinitilize the pointers to the beginning of the k-th column/row of
     * L/U factors.
     * usub[] - index array for panel U(k,:)
     */
    int_t iuip_lib, ruip_lib;
    iuip_lib = BR_HEADER;
    ruip_lib = 0;

    int_t ijb = index[iuip_lib];
    while (ijb < jb) {   /* Search for destination block. */
        ruip_lib += index[iuip_lib + 1];
        // printf("supersize[%ld] \t:%ld \n",ijb,SuperSize( ijb ) );
        iuip_lib += UB_DESCRIPTOR + SuperSize (ijb);
        ijb = index[iuip_lib];
    }
    /* Skip descriptor. Now point to fstnz index of block U(i,j). */
    iuip_lib += UB_DESCRIPTOR;

    // tempv = bigV + (cum_nrow + cum_ncol*nbrow);
    for (jj = 0; jj < nsupc; ++jj) {
        segsize = klst - usub[iukp + jj];
        fnz = index[iuip_lib++];
        if (segsize) {          /* Nonzero segment in U(k,j). */
            ucol = &Unzval_br_ptr[lib][ruip_lib];

            // printf("========Entering loop=========\n");
#if (_OPENMP>=201307)
#pragma omp simd
#endif
            for (i = 0; i < temp_nbrow; ++i) {
                rel = lsub[lptr + i] - fnz;
                // printf("%d %d %d %d %d \n",lptr,i,fnz,temp_nbrow,nbrow );
                // printf("hello   ucol[%d] %d %d : \n",rel,lsub[lptr + i],fnz);
                ucol[rel] -= tempv[i];

#ifdef PI_DEBUG
                double zz = 0.0;
                if (!(*(long *) &zz == *(long *) &tempv[i]))
                    printf ("(%d, %0.3e, %0.3e ) ", rel, ucol[rel] + tempv[i],
                            ucol[rel]);
                //printing triplets (location??, old value, new value ) if none of them is zero
#endif
            } /* for i = 0:temp_nbropw */
            tempv += nbrow; /* Jump LDA to next column */
#ifdef PI_DEBUG
            // printf("\n");
#endif
        }  /* if segsize */

        ruip_lib += ilst - fnz;

    }  /* for jj = 0:nsupc */
#ifdef PI_DEBUG
    // printf("\n");
#endif
    // TAU_STATIC_TIMER_STOP("SCATTER_UB");
} /* dscatter_u */


/*Divide CPU-GPU dgemm work here*/
#ifdef PI_DEBUG
int Ngem = 2;
// int_t Ngem = 0;
int min_gpu_col = 6;
#else

    // int_t Ngem = 0;

#endif


#ifdef GPU_ACC

void
gemm_division_cpu_gpu(
    int* num_streams_used,  /*number of streams that will be used */
    int* stream_end_col,    /*array holding last column blk for each partition */
    int * ncpu_blks,        /*Number of CPU dgemm blks */
    /*input */
    int nbrow,              /*number of row in A matrix */
    int ldu,                /*number of k in dgemm */
    int nstreams, 
    int* full_u_cols,       /*array containing prefix sum of work load */
    int num_blks            /*Number of work load */
)
{
    int Ngem = sp_ienv(7);  /*get_mnk_dgemm ();*/
    int min_gpu_col = get_cublas_nb ();

    // Ngem = 1000000000;
    /*
       cpu is to gpu dgemm should be ideally 0:1 ratios to hide the total cost
       However since there is gpu latency of around 20,000 ns implying about
       200000 floating point calculation be done in that time so ~200,000/(2*nbrow*ldu)
       should be done in cpu to hide the latency; we Ngem =200,000/2 
     */
    int i, j;

    // {
    //     *num_streams_used=0;
    //     *ncpu_blks = num_blks;
    //     return;
    // }

    for (int i = 0; i < nstreams; ++i)
    {
        stream_end_col[i] = num_blks;
    }

    *ncpu_blks = 0;
    /*easy returns -1 when number of column are less than threshold */
    if (full_u_cols[num_blks - 1] < (Ngem / (nbrow * ldu)) || num_blks == 1 )
    {
        *num_streams_used = 0;
        *ncpu_blks = num_blks;
#ifdef PI_DEBUG
        printf ("full_u_cols[num_blks-1] %d  %d \n",
                full_u_cols[num_blks - 1], (Ngem / (nbrow * ldu)));
        printf ("Early return \n");
#endif
        return;

    }

    /* Easy return -2 when number of streams =0 */
    if (nstreams == 0)
    {
        *num_streams_used = 0;
        *ncpu_blks = num_blks;
        return;
        /* code */
    }
    /*find first block where count > Ngem */


    for (i = 0; i < num_blks - 1; ++i)  /*I can use binary search here */
    {
        if (full_u_cols[i + 1] > Ngem / (nbrow * ldu))
            break;
    }
    *ncpu_blks = i + 1;

    int_t cols_remain =
        full_u_cols[num_blks - 1] - full_u_cols[*ncpu_blks - 1];

#ifdef PI_DEBUG
    printf ("Remaining cols %d num_blks %d cpu_blks %d \n", cols_remain,
            num_blks, *ncpu_blks);
#endif
    if (cols_remain > 0)
    {
        *num_streams_used = 1;  /* now atleast one stream would be used */

#ifdef PI_DEBUG
        printf ("%d %d  %d %d \n", full_u_cols[num_blks - 1],
                full_u_cols[*ncpu_blks], *ncpu_blks, nstreams);
#endif
        int_t FP_MIN = 200000 / (nbrow * ldu);
        int_t cols_per_stream = SUPERLU_MAX (min_gpu_col, cols_remain / nstreams);
        cols_per_stream = SUPERLU_MAX (cols_per_stream, FP_MIN);
#ifdef PI_DEBUG
        printf ("cols_per_stream :\t%d\n", cols_per_stream);
#endif

        int_t cutoff = cols_per_stream + full_u_cols[*ncpu_blks - 1];
        for (int_t i = 0; i < nstreams; ++i)
        {
            stream_end_col[i] = num_blks;
        }
        j = *ncpu_blks;
        for (i = 0; i < nstreams - 1; ++i)
        {
            int_t st = (i == 0) ? (*ncpu_blks) : stream_end_col[i - 1];

            for (j = st; j < num_blks - 1; ++j)
            {
#ifdef PI_DEBUG
                printf ("i %d, j %d, %d  %d ", i, j, full_u_cols[j + 1],
                        cutoff);
#endif
                if (full_u_cols[j + 1] > cutoff)
                {
#ifdef PI_DEBUG
                    printf ("cutoff met \n");
#endif
                    cutoff = cols_per_stream + full_u_cols[j];
                    stream_end_col[i] = j + 1;
                    *num_streams_used += 1;
                    j++;
                    break;
                }
#ifdef PI_DEBUG
                printf ("\n");
#endif
            }

        }

    }
}

void
gemm_division_new (int * num_streams_used,   /*number of streams that will be used */
                   int * stream_end_col, /*array holding last column blk for each partition */
                   int * ncpu_blks,  /*Number of CPU dgemm blks */
                        /*input */
                   int nbrow,    /*number of row in A matrix */
                   int ldu,  /*number of k in dgemm */
                   int nstreams,
                   Ublock_info_t *Ublock_info,    /*array containing prefix sum of work load */
                   int num_blks  /*Number of work load */
    )
{
    int Ngem = sp_ienv(7); /*get_mnk_dgemm ();*/
    int min_gpu_col = get_cublas_nb ();

    // Ngem = 1000000000;
    /*
       cpu is to gpu dgemm should be ideally 0:1 ratios to hide the total cost
       However since there is gpu latency of around 20,000 ns implying about
       200000 floating point calculation be done in that time so ~200,000/(2*nbrow*ldu)
       should be done in cpu to hide the latency; we Ngem =200,000/2 
     */
    int_t i, j;


    for (int i = 0; i < nstreams; ++i)
    {
        stream_end_col[i] = num_blks;
    }

    *ncpu_blks = 0;
    /*easy returns -1 when number of column are less than threshold */
    if (Ublock_info[num_blks - 1].full_u_cols < (Ngem / (nbrow * ldu)) || num_blks == 1)
    {
        *num_streams_used = 0;
        *ncpu_blks = num_blks;

        return;

    }

    /* Easy return -2 when number of streams =0 */
    if (nstreams == 0)
    {
        *num_streams_used = 0;
        *ncpu_blks = num_blks;
        return;
        /* code */
    }
    /*find first block where count > Ngem */


    for (i = 0; i < num_blks - 1; ++i)  /*I can use binary search here */
    {
        if (Ublock_info[i + 1].full_u_cols > Ngem / (nbrow * ldu))
            break;
    }
    *ncpu_blks = i + 1;

    int_t cols_remain =
       Ublock_info [num_blks - 1].full_u_cols - Ublock_info[*ncpu_blks - 1].full_u_cols;

    if (cols_remain > 0)
    {
        *num_streams_used = 1;  /* now atleast one stream would be used */

        int_t FP_MIN = 200000 / (nbrow * ldu);
        int_t cols_per_stream = SUPERLU_MAX (min_gpu_col, cols_remain / nstreams);
        cols_per_stream = SUPERLU_MAX (cols_per_stream, FP_MIN);

        int_t cutoff = cols_per_stream + Ublock_info[*ncpu_blks - 1].full_u_cols;
        for (int_t i = 0; i < nstreams; ++i)
        {
            stream_end_col[i] = num_blks;
        }
        j = *ncpu_blks;
        for (i = 0; i < nstreams - 1; ++i)
        {
            int_t st = (i == 0) ? (*ncpu_blks) : stream_end_col[i - 1];

            for (j = st; j < num_blks - 1; ++j)
            {
                if (Ublock_info[j + 1].full_u_cols > cutoff)
                {

                    cutoff = cols_per_stream + Ublock_info[j].full_u_cols;
                    stream_end_col[i] = j + 1;
                    *num_streams_used += 1;
                    j++;
                    break;
                }

            }

        }

    }
}

#endif  /* defined GPU_ACC */
SuperLU_DIST_5.3.0/SRC/pdgstrf_X1.c0000644013363400111340000011704013233431301015453 0ustar  xiaoyessg/*! \file
Copyright (c) 2003, The Regents of the University of California, through
Lawrence Berkeley National Laboratory (subject to receipt of any required 
approvals from U.S. Dept. of Energy) 

All rights reserved. 

The source code is distributed under BSD license, see the file License.txt
at the top-level directory.
*/
/*! @file
 * \brief Performs the LU factorization in parallel
 *
 * 
 * -- Distributed SuperLU routine (version 1.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 *
 * Modified:
 *     Feburary 7, 2001    use MPI_Isend/MPI_Irecv
 *
 * Sketch of the algorithm
 * =======================
 *
 * The following relations hold:
 *     * A_kk = L_kk * U_kk
 *     * L_ik = Aik * U_kk^(-1)
 *     * U_kj = L_kk^(-1) * A_kj
 *
 *              ----------------------------------
 *              |   |                            |
 *              ----|-----------------------------
 *              |   | \ U_kk|                    |
 *              |   |   \   |        U_kj        |
 *              |   |L_kk \ |         ||         |
 *              ----|-------|---------||----------
 *              |   |       |         \/         |
 *              |   |       |                    |
 *              |   |       |                    |
 *              |   |       |                    |
 *              |   | L_ik ==>       A_ij        |
 *              |   |       |                    |
 *              |   |       |                    |
 *              |   |       |                    |
 *              ----------------------------------
 *
 * Handle the first block of columns separately.
 *     * Factor diagonal and subdiagonal blocks and test for exact
 *       singularity. ( pdgstrf2(0), one column at a time )
 *     * Compute block row of U
 *     * Update trailing matrix
 * 
 * Loop over the remaining blocks of columns.
 *   mycol = MYCOL( iam, grid );
 *   myrow = MYROW( iam, grid );
 *   N = nsupers;
 *   For (k = 1; k < N; ++k) {
 *       krow = PROW( k, grid );
 *       kcol = PCOL( k, grid );
 *       Pkk = PNUM( krow, kcol, grid );
 *
 *     * Factor diagonal and subdiagonal blocks and test for exact
 *       singularity.
 *       if ( mycol == kcol ) {
 *           pdgstrf2(k), one column at a time 
 *       }
 *
 *     * Parallel triangular solve
 *       if ( iam == Pkk ) multicast L_k,k to this process row;
 *       if ( myrow == krow && mycol != kcol ) {
 *          Recv L_k,k from process Pkk;
 *          for (j = k+1; j < N; ++j) 
 *              if ( PCOL( j, grid ) == mycol && A_k,j != 0 )
 *                 U_k,j = L_k,k \ A_k,j;
 *       }
 *
 *     * Parallel rank-k update
 *       if ( myrow == krow ) multicast U_k,k+1:N to this process column;
 *       if ( mycol == kcol ) multicast L_k+1:N,k to this process row;
 *       if ( myrow != krow ) {
 *          Pkj = PNUM( krow, mycol, grid );
 *          Recv U_k,k+1:N from process Pkj;
 *       }
 *       if ( mycol != kcol ) {
 *          Pik = PNUM( myrow, kcol, grid );
 *          Recv L_k+1:N,k from process Pik;
 *       }
 *       for (j = k+1; k < N; ++k) {
 *          for (i = k+1; i < N; ++i) 
 *              if ( myrow == PROW( i, grid ) && mycol == PCOL( j, grid )
 *                   && L_i,k != 0 && U_k,j != 0 )
 *                 A_i,j = A_i,j - L_i,k * U_k,j;
 *       }
 *  }
 *
 *
 * Remaining issues
 *   (1) Use local indices for L subscripts and SPA.  [DONE]
 * 
*/ #include #include "superlu_ddefs.h" #define CRAY_X1 #if ( VAMPIR>=1 ) #include #endif /* * Internal prototypes */ static void pdgstrf2(superlu_options_t *, int_t, double, Glu_persist_t *, gridinfo_t *, LocalLU_t *, SuperLUStat_t *, int *); #ifdef _CRAY static void pdgstrs2(int_t, int_t, Glu_persist_t *, gridinfo_t *, LocalLU_t *, SuperLUStat_t *, _fcd, _fcd, _fcd); #else static void pdgstrs2(int_t, int_t, Glu_persist_t *, gridinfo_t *, LocalLU_t *, SuperLUStat_t *); #endif /* * */ /************************************************************************/ /*! \brief * *
 * Purpose
 * =======
 *
 *  PDGSTRF performs the LU factorization in parallel.
 *
 * Arguments
 * =========
 * 
 * options (input) superlu_options_t*
 *         The structure defines the input parameters to control
 *         how the LU decomposition will be performed.
 *         The following field should be defined:
 *         o ReplaceTinyPivot (yes_no_t)
 *           Specifies whether to replace the tiny diagonals by
 *           sqrt(epsilon)*norm(A) during LU factorization.
 *
 * m      (input) int
 *        Number of rows in the matrix.
 *
 * n      (input) int
 *        Number of columns in the matrix.
 *
 * anorm  (input) double
 *        The norm of the original matrix A, or the scaled A if
 *        equilibration was done.
 *
 * LUstruct (input/output) LUstruct_t*
 *         The data structures to store the distributed L and U factors.
 *         The following fields should be defined:
 *
 *         o Glu_persist (input) Glu_persist_t*
 *           Global data structure (xsup, supno) replicated on all processes,
 *           describing the supernode partition in the factored matrices
 *           L and U:
 *	       xsup[s] is the leading column of the s-th supernode,
 *             supno[i] is the supernode number to which column i belongs.
 *
 *         o Llu (input/output) LocalLU_t*
 *           The distributed data structures to store L and U factors.
 *           See superlu_ddefs.h for the definition of 'LocalLU_t'.
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh. It contains the MPI communicator, the number
 *        of process rows (NPROW), the number of process columns (NPCOL),
 *        and my process rank. It is an input argument to all the
 *        parallel routines.
 *        Grid can be initialized by subroutine SUPERLU_GRIDINIT.
 *        See superlu_ddefs.h for the definition of 'gridinfo_t'.
 *
 * stat   (output) SuperLUStat_t*
 *        Record the statistics on runtime and floating-point operation count.
 *        See util.h for the definition of 'SuperLUStat_t'.
 *
 * info   (output) int*
 *        = 0: successful exit
 *        < 0: if info = -i, the i-th argument had an illegal value
 *        > 0: if info = i, U(i,i) is exactly zero. The factorization has
 *             been completed, but the factor U is exactly singular,
 *             and division by zero will occur if it is used to solve a
 *             system of equations.
 * 
*/ void pdgstrf /************************************************************************/ ( superlu_options_t *options, int m, int n, double anorm, LUstruct_t *LUstruct, gridinfo_t *grid, SuperLUStat_t *stat, int *info ) { #ifdef _CRAY _fcd ftcs = _cptofcd("N", strlen("N")); _fcd ftcs1 = _cptofcd("L", strlen("L")); _fcd ftcs2 = _cptofcd("N", strlen("N")); _fcd ftcs3 = _cptofcd("U", strlen("U")); #endif double alpha = 1.0, beta = 0.0; int_t *xsup; int_t *lsub, *lsub1, *usub, *Usub_buf, *Lsub_buf_2[2]; /* Need 2 buffers to implement Irecv. */ double *lusup, *lusup1, *uval, *Uval_buf, *Lval_buf_2[2]; /* Need 2 buffers to implement Irecv. */ int_t fnz, i, ib, ijb, ilst, it, iukp, jb, jj, klst, knsupc, lb, lib, ldv, ljb, lptr, lptr0, lptrj, luptr, luptr0, luptrj, nlb, nub, nsupc, rel, rukp; int_t Pc, Pr; int iam, kcol, krow, mycol, myrow, pi, pj; int j, k, lk, nsupers; int nsupr, nbrow, segsize; int msgcnt[4]; /* Count the size of the message xfer'd in each buffer: * 0 : transferred in Lsub_buf[] * 1 : transferred in Lval_buf[] * 2 : transferred in Usub_buf[] * 3 : transferred in Uval_buf[] */ int_t msg0, msg2; int_t **Ufstnz_br_ptr, **Lrowind_bc_ptr; double **Unzval_br_ptr, **Lnzval_bc_ptr; int_t *index; double *nzval; int_t *iuip, *ruip;/* Pointers to U index/nzval; size ceil(NSUPERS/Pr). */ double *ucol; int_t *indirect; double *tempv, *tempv2d; int_t iinfo; int_t *ToRecv, *ToSendD, **ToSendR; Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; superlu_scope_t *scp; double s_eps, thresh; double *tempU2d, *tempu; int full, ldt, ldu, lead_zero, ncols; MPI_Request recv_req[4], *send_req; MPI_Status status; #ifdef CRAY_X1 int nonzero_segs; #endif #if ( DEBUGlevel>=2 ) int_t num_copy=0, num_update=0; #endif #if ( PRNTlevel==3 ) int_t zero_msg = 0, total_msg = 0; #endif #if ( PROFlevel>=1 ) double t1, t2; float msg_vol = 0, msg_cnt = 0; int_t iword = sizeof(int_t), dword = sizeof(double); #endif /* Test the input parameters. */ *info = 0; if ( m < 0 ) *info = -2; else if ( n < 0 ) *info = -3; if ( *info ) { pxerbla("pdgstrf", grid, -*info); return; } /* Quick return if possible. */ if ( m == 0 || n == 0 ) return; /* * Initialization. */ iam = grid->iam; Pc = grid->npcol; Pr = grid->nprow; myrow = MYROW( iam, grid ); mycol = MYCOL( iam, grid ); nsupers = Glu_persist->supno[n-1] + 1; xsup = Glu_persist->xsup; s_eps = slamch_("Epsilon"); thresh = s_eps * anorm; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter pdgstrf()"); #endif stat->ops[FACT] = 0.0; if ( Pr*Pc > 1 ) { i = Llu->bufmax[0]; if ( !(Llu->Lsub_buf_2[0] = intMalloc_dist(2 * ((size_t)i))) ) ABORT("Malloc fails for Lsub_buf."); Llu->Lsub_buf_2[1] = Llu->Lsub_buf_2[0] + i; i = Llu->bufmax[1]; if ( !(Llu->Lval_buf_2[0] = doubleMalloc_dist(2 * ((size_t)i))) ) ABORT("Malloc fails for Lval_buf[]."); Llu->Lval_buf_2[1] = Llu->Lval_buf_2[0] + i; if ( Llu->bufmax[2] != 0 ) if ( !(Llu->Usub_buf = intMalloc_dist(Llu->bufmax[2])) ) ABORT("Malloc fails for Usub_buf[]."); if ( Llu->bufmax[3] != 0 ) if ( !(Llu->Uval_buf = doubleMalloc_dist(Llu->bufmax[3])) ) ABORT("Malloc fails for Uval_buf[]."); if ( !(send_req = (MPI_Request *) SUPERLU_MALLOC(2*Pc*sizeof(MPI_Request)))) ABORT("Malloc fails for send_req[]."); } if ( !(Llu->ujrow = doubleMalloc_dist(sp_ienv_dist(3))) ) ABORT("Malloc fails for ujrow[]."); #if ( PRNTlevel>=1 ) if ( !iam ) { printf(".. thresh = s_eps %e * anorm %e = %e\n", s_eps, anorm, thresh); printf(".. Buffer size: Lsub %d\tLval %d\tUsub %d\tUval %d\tLDA %d\n", Llu->bufmax[0], Llu->bufmax[1], Llu->bufmax[2], Llu->bufmax[3], Llu->bufmax[4]); } #endif Lsub_buf_2[0] = Llu->Lsub_buf_2[0]; Lsub_buf_2[1] = Llu->Lsub_buf_2[1]; Lval_buf_2[0] = Llu->Lval_buf_2[0]; Lval_buf_2[1] = Llu->Lval_buf_2[1]; Usub_buf = Llu->Usub_buf; Uval_buf = Llu->Uval_buf; Lrowind_bc_ptr = Llu->Lrowind_bc_ptr; Lnzval_bc_ptr = Llu->Lnzval_bc_ptr; Ufstnz_br_ptr = Llu->Ufstnz_br_ptr; Unzval_br_ptr = Llu->Unzval_br_ptr; ToRecv = Llu->ToRecv; ToSendD = Llu->ToSendD; ToSendR = Llu->ToSendR; ldt = sp_ienv_dist(3); /* Size of maximum supernode */ if ( !(tempv2d = doubleCalloc_dist(2*((size_t)ldt)*ldt)) ) ABORT("Calloc fails for tempv2d[]."); tempU2d = tempv2d + ldt*ldt; #ifdef CRAY_X1 if ( !(indirect = intMalloc_dist(2*ldt)) ) ABORT("Malloc fails for indirect[]."); #else if ( !(indirect = intMalloc_dist(ldt)) ) ABORT("Malloc fails for indirect[]."); #endif k = CEILING( nsupers, Pr ); /* Number of local block rows */ if ( !(iuip = intMalloc_dist(k)) ) ABORT("Malloc fails for iuip[]."); if ( !(ruip = intMalloc_dist(k)) ) ABORT("Malloc fails for ruip[]."); #if ( VAMPIR>=1 ) VT_symdef(1, "Send-L", "Comm"); VT_symdef(2, "Recv-L", "Comm"); VT_symdef(3, "Send-U", "Comm"); VT_symdef(4, "Recv-U", "Comm"); VT_symdef(5, "TRF2", "Factor"); VT_symdef(100, "Factor", "Factor"); VT_begin(100); VT_traceon(); #endif /* --------------------------------------------------------------- Handle the first block column separately to start the pipeline. --------------------------------------------------------------- */ if ( mycol == 0 ) { #if ( VAMPIR>=1 ) VT_begin(5); #endif pdgstrf2(options, 0, thresh, Glu_persist, grid, Llu, stat, info); #if ( VAMPIR>=1 ) VT_end(5); #endif scp = &grid->rscp; /* The scope of process row. */ /* Process column *kcol* multicasts numeric values of L(:,k) to process rows. */ lsub = Lrowind_bc_ptr[0]; lusup = Lnzval_bc_ptr[0]; if ( lsub ) { msgcnt[0] = lsub[1] + BC_HEADER + lsub[0]*LB_DESCRIPTOR; msgcnt[1] = lsub[1] * SuperSize( 0 ); } else { msgcnt[0] = msgcnt[1] = 0; } for (pj = 0; pj < Pc; ++pj) { if ( ToSendR[0][pj] != EMPTY ) { #if ( PROFlevel>=1 ) TIC(t1); #endif #if ( VAMPIR>=1 ) VT_begin(1); #endif MPI_Isend( lsub, msgcnt[0], mpi_int_t, pj, 0, scp->comm, &send_req[pj] ); MPI_Isend( lusup, msgcnt[1], MPI_DOUBLE, pj, 1, scp->comm, &send_req[pj+Pc] ); #if ( DEBUGlevel>=2 ) printf("(%d) Send L(:,%4d): lsub %4d, lusup %4d to Pc %2d\n", iam, 0, msgcnt[0], msgcnt[1], pj); #endif #if ( VAMPIR>=1 ) VT_end(1); #endif #if ( PROFlevel>=1 ) TOC(t2, t1); stat->utime[COMM] += t2; msg_cnt += 2; msg_vol += msgcnt[0]*iword + msgcnt[1]*dword; #endif } } /* for pj ... */ } else { /* Post immediate receives. */ if ( ToRecv[0] >= 1 ) { /* Recv block column L(:,0). */ scp = &grid->rscp; /* The scope of process row. */ MPI_Irecv( Lsub_buf_2[0], Llu->bufmax[0], mpi_int_t, 0, 0, scp->comm, &recv_req[0] ); MPI_Irecv( Lval_buf_2[0], Llu->bufmax[1], MPI_DOUBLE, 0, 1, scp->comm, &recv_req[1] ); #if ( DEBUGlevel>=2 ) printf("(%d) Post Irecv L(:,%4d)\n", iam, 0); #endif } } /* if mycol == 0 */ /* ------------------------------------------ MAIN LOOP: Loop through all block columns. ------------------------------------------ */ for (k = 0; k < nsupers; ++k) { knsupc = SuperSize( k ); krow = PROW( k, grid ); kcol = PCOL( k, grid ); if ( mycol == kcol ) { lk = LBj( k, grid ); /* Local block number. */ for (pj = 0; pj < Pc; ++pj) { /* Wait for Isend to complete before using lsub/lusup. */ if ( ToSendR[lk][pj] != EMPTY ) { MPI_Wait( &send_req[pj], &status ); MPI_Wait( &send_req[pj+Pc], &status ); } } lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; } else { if ( ToRecv[k] >= 1 ) { /* Recv block column L(:,k). */ scp = &grid->rscp; /* The scope of process row. */ #if ( PROFlevel>=1 ) TIC(t1); #endif #if ( VAMPIR>=1 ) VT_begin(2); #endif /*probe_recv(iam, kcol, (4*k)%NTAGS, mpi_int_t, scp->comm, Llu->bufmax[0]);*/ /*MPI_Recv( Lsub_buf, Llu->bufmax[0], mpi_int_t, kcol, (4*k)%NTAGS, scp->comm, &status );*/ MPI_Wait( &recv_req[0], &status ); MPI_Get_count( &status, mpi_int_t, &msgcnt[0] ); /*probe_recv(iam, kcol, (4*k+1)%NTAGS, MPI_DOUBLE, scp->comm, Llu->bufmax[1]);*/ /*MPI_Recv( Lval_buf, Llu->bufmax[1], MPI_DOUBLE, kcol, (4*k+1)%NTAGS, scp->comm, &status );*/ MPI_Wait( &recv_req[1], &status ); MPI_Get_count( &status, MPI_DOUBLE, &msgcnt[1] ); #if ( VAMPIR>=1 ) VT_end(2); #endif #if ( PROFlevel>=1 ) TOC(t2, t1); stat->utime[COMM] += t2; #endif #if ( DEBUGlevel>=2 ) printf("(%d) Recv L(:,%4d): lsub %4d, lusup %4d from Pc %2d\n", iam, k, msgcnt[0], msgcnt[1], kcol); fflush(stdout); #endif lsub = Lsub_buf_2[k%2]; lusup = Lval_buf_2[k%2]; #if ( PRNTlevel==3 ) ++total_msg; if ( !msgcnt[0] ) ++zero_msg; #endif } else msgcnt[0] = 0; } /* if mycol = Pc(k) */ scp = &grid->cscp; /* The scope of process column. */ if ( myrow == krow ) { /* Parallel triangular solve across process row *krow* -- U(k,j) = L(k,k) \ A(k,j). */ #ifdef _CRAY pdgstrs2(n, k, Glu_persist, grid, Llu, stat, ftcs1, ftcs2, ftcs3); #else pdgstrs2(n, k, Glu_persist, grid, Llu, stat); #endif /* Multicasts U(k,:) to process columns. */ lk = LBi( k, grid ); usub = Ufstnz_br_ptr[lk]; uval = Unzval_br_ptr[lk]; if ( usub ) { msgcnt[2] = usub[2]; msgcnt[3] = usub[1]; } else { msgcnt[2] = msgcnt[3] = 0; } if ( ToSendD[lk] == YES ) { for (pi = 0; pi < Pr; ++pi) { if ( pi != myrow ) { #if ( PROFlevel>=1 ) TIC(t1); #endif #if ( VAMPIR>=1 ) VT_begin(3); #endif MPI_Send( usub, msgcnt[2], mpi_int_t, pi, (4*k+2)%NTAGS, scp->comm); MPI_Send( uval, msgcnt[3], MPI_DOUBLE, pi, (4*k+3)%NTAGS, scp->comm); #if ( VAMPIR>=1 ) VT_end(3); #endif #if ( PROFlevel>=1 ) TOC(t2, t1); stat->utime[COMM] += t2; msg_cnt += 2; msg_vol += msgcnt[2]*iword + msgcnt[3]*dword; #endif #if ( DEBUGlevel>=2 ) printf("(%d) Send U(%4d,:) to Pr %2d\n", iam, k, pi); #endif } /* if pi ... */ } /* for pi ... */ } /* if ToSendD ... */ } else { /* myrow != krow */ if ( ToRecv[k] == 2 ) { /* Recv block row U(k,:). */ #if ( PROFlevel>=1 ) TIC(t1); #endif #if ( VAMPIR>=1 ) VT_begin(4); #endif /*probe_recv(iam, krow, (4*k+2)%NTAGS, mpi_int_t, scp->comm, Llu->bufmax[2]);*/ MPI_Recv( Usub_buf, Llu->bufmax[2], mpi_int_t, krow, (4*k+2)%NTAGS, scp->comm, &status ); MPI_Get_count( &status, mpi_int_t, &msgcnt[2] ); /*probe_recv(iam, krow, (4*k+3)%NTAGS, MPI_DOUBLE, scp->comm, Llu->bufmax[3]);*/ MPI_Recv( Uval_buf, Llu->bufmax[3], MPI_DOUBLE, krow, (4*k+3)%NTAGS, scp->comm, &status ); MPI_Get_count( &status, MPI_DOUBLE, &msgcnt[3] ); #if ( VAMPIR>=1 ) VT_end(4); #endif #if ( PROFlevel>=1 ) TOC(t2, t1); stat->utime[COMM] += t2; #endif usub = Usub_buf; uval = Uval_buf; #if ( DEBUGlevel>=2 ) printf("(%d) Recv U(%4d,:) from Pr %2d\n", iam, k, krow); #endif #if ( PRNTlevel==3 ) ++total_msg; if ( !msgcnt[2] ) ++zero_msg; #endif } else msgcnt[2] = 0; } /* if myrow == Pr(k) */ /* * Parallel rank-k update; pair up blocks L(i,k) and U(k,j). * for (j = k+1; k < N; ++k) { * for (i = k+1; i < N; ++i) * if ( myrow == PROW( i, grid ) && mycol == PCOL( j, grid ) * && L(i,k) != 0 && U(k,j) != 0 ) * A(i,j) = A(i,j) - L(i,k) * U(k,j); */ msg0 = msgcnt[0]; msg2 = msgcnt[2]; if ( msg0 && msg2 ) { /* L(:,k) and U(k,:) are not empty. */ nsupr = lsub[1]; /* LDA of lusup. */ if ( myrow == krow ) { /* Skip diagonal block L(k,k). */ lptr0 = BC_HEADER + LB_DESCRIPTOR + lsub[BC_HEADER+1]; luptr0 = knsupc; nlb = lsub[0] - 1; } else { lptr0 = BC_HEADER; luptr0 = 0; nlb = lsub[0]; } lptr = lptr0; for (lb = 0; lb < nlb; ++lb) { /* Initialize block row pointers. */ ib = lsub[lptr]; lib = LBi( ib, grid ); iuip[lib] = BR_HEADER; ruip[lib] = 0; lptr += LB_DESCRIPTOR + lsub[lptr+1]; } nub = usub[0]; /* Number of blocks in the block row U(k,:) */ iukp = BR_HEADER; /* Skip header; Pointer to index[] of U(k,:) */ rukp = 0; /* Pointer to nzval[] of U(k,:) */ klst = FstBlockC( k+1 ); /* --------------------------------------------------- Update the first block column A(:,k+1). --------------------------------------------------- */ jb = usub[iukp]; /* Global block number of block U(k,j). */ if ( jb == k+1 ) { /* First update (k+1)-th block. */ --nub; lptr = lptr0; luptr = luptr0; ljb = LBj( jb, grid ); /* Local block number of U(k,j). */ nsupc = SuperSize( jb ); iukp += UB_DESCRIPTOR; /* Start fstnz of block U(k,j). */ /* Prepare to call DGEMM. */ jj = iukp; while ( usub[jj] == klst ) ++jj; ldu = klst - usub[jj++]; ncols = 1; full = 1; for (; jj < iukp+nsupc; ++jj) { segsize = klst - usub[jj]; if ( segsize ) { ++ncols; if ( segsize != ldu ) full = 0; if ( segsize > ldu ) ldu = segsize; } } #if ( DEBUGlevel>=3 ) ++num_update; #endif if ( full ) { tempu = &uval[rukp]; } else { /* Copy block U(k,j) into tempU2d. */ #if ( DEBUGlevel>=3 ) printf("(%d) full=%d,k=%d,jb=%d,ldu=%d,ncols=%d,nsupc=%d\n", iam, full, k, jb, ldu, ncols, nsupc); ++num_copy; #endif tempu = tempU2d; for (jj = iukp; jj < iukp+nsupc; ++jj) { segsize = klst - usub[jj]; if ( segsize ) { lead_zero = ldu - segsize; for (i = 0; i < lead_zero; ++i) tempu[i] = 0.0; tempu += lead_zero; for (i = 0; i < segsize; ++i) tempu[i] = uval[rukp+i]; rukp += segsize; tempu += segsize; } } tempu = tempU2d; rukp -= usub[iukp - 1]; /* Return to start of U(k,j). */ } /* if full ... */ for (lb = 0; lb < nlb; ++lb) { ib = lsub[lptr]; /* Row block L(i,k). */ nbrow = lsub[lptr+1]; /* Number of full rows. */ lptr += LB_DESCRIPTOR; /* Skip descriptor. */ tempv = tempv2d; #ifdef _CRAY SGEMM(ftcs, ftcs, &nbrow, &ncols, &ldu, &alpha, &lusup[luptr+(knsupc-ldu)*nsupr], &nsupr, tempu, &ldu, &beta, tempv, &ldt); #else dgemm_("N", "N", &nbrow, &ncols, &ldu, &alpha, &lusup[luptr+(knsupc-ldu)*nsupr], &nsupr, tempu, &ldu, &beta, tempv, &ldt); #endif stat->ops[FACT] += 2 * nbrow * ldu * ncols; /* Now gather the result into the destination block. */ if ( ib < jb ) { /* A(i,j) is in U. */ ilst = FstBlockC( ib+1 ); lib = LBi( ib, grid ); index = Ufstnz_br_ptr[lib]; ijb = index[iuip[lib]]; while ( ijb < jb ) { /* Search for dest block. */ ruip[lib] += index[iuip[lib]+1]; iuip[lib] += UB_DESCRIPTOR + SuperSize( ijb ); ijb = index[iuip[lib]]; } iuip[lib] += UB_DESCRIPTOR; /* Skip descriptor. */ tempv = tempv2d; for (jj = 0; jj < nsupc; ++jj) { segsize = klst - usub[iukp + jj]; fnz = index[iuip[lib]++]; if ( segsize ) { /* Nonzero segment in U(k.j). */ ucol = &Unzval_br_ptr[lib][ruip[lib]]; for (i = 0, it = 0; i < nbrow; ++i) { rel = lsub[lptr + i] - fnz; ucol[rel] -= tempv[it++]; } tempv += ldt; } ruip[lib] += ilst - fnz; } } else { /* A(i,j) is in L. */ index = Lrowind_bc_ptr[ljb]; ldv = index[1]; /* LDA of the dest lusup. */ lptrj = BC_HEADER; luptrj = 0; ijb = index[lptrj]; while ( ijb != ib ) { /* Search for dest block -- blocks are not ordered! */ luptrj += index[lptrj+1]; lptrj += LB_DESCRIPTOR + index[lptrj+1]; ijb = index[lptrj]; } /* * Build indirect table. This is needed because the * indices are not sorted. */ fnz = FstBlockC( ib ); lptrj += LB_DESCRIPTOR; for (i = 0; i < index[lptrj-1]; ++i) { rel = index[lptrj + i] - fnz; indirect[rel] = i; } nzval = Lnzval_bc_ptr[ljb] + luptrj; tempv = tempv2d; for (jj = 0; jj < nsupc; ++jj) { segsize = klst - usub[iukp + jj]; if ( segsize ) { /*#pragma _CRI cache_bypass nzval,tempv*/ for (it = 0, i = 0; i < nbrow; ++i) { rel = lsub[lptr + i] - fnz; nzval[indirect[rel]] -= tempv[it++]; } tempv += ldt; } nzval += ldv; } } /* if ib < jb ... */ lptr += nbrow; luptr += nbrow; } /* for lb ... */ rukp += usub[iukp - 1]; /* Move to block U(k,j+1) */ iukp += nsupc; } /* if jb == k+1 */ } /* if L(:,k) and U(k,:) not empty */ if ( k+1 < nsupers ) { kcol = PCOL( k+1, grid ); if ( mycol == kcol ) { #if ( VAMPIR>=1 ) VT_begin(5); #endif /* Factor diagonal and subdiagonal blocks and test for exact singularity. */ pdgstrf2(options, k+1, thresh, Glu_persist, grid, Llu, stat, info); #if ( VAMPIR>=1 ) VT_end(5); #endif /* Process column *kcol+1* multicasts numeric values of L(:,k+1) to process rows. */ lk = LBj( k+1, grid ); /* Local block number. */ lsub1 = Lrowind_bc_ptr[lk]; if ( lsub1 ) { msgcnt[0] = lsub1[1] + BC_HEADER + lsub1[0]*LB_DESCRIPTOR; msgcnt[1] = lsub1[1] * SuperSize( k+1 ); } else { msgcnt[0] = 0; msgcnt[1] = 0; } scp = &grid->rscp; /* The scope of process row. */ for (pj = 0; pj < Pc; ++pj) { if ( ToSendR[lk][pj] != EMPTY ) { lusup1 = Lnzval_bc_ptr[lk]; #if ( PROFlevel>=1 ) TIC(t1); #endif #if ( VAMPIR>=1 ) VT_begin(1); #endif MPI_Isend( lsub1, msgcnt[0], mpi_int_t, pj, (4*(k+1))%NTAGS, scp->comm, &send_req[pj] ); MPI_Isend( lusup1, msgcnt[1], MPI_DOUBLE, pj, (4*(k+1)+1)%NTAGS, scp->comm, &send_req[pj+Pc] ); #if ( VAMPIR>=1 ) VT_end(1); #endif #if ( PROFlevel>=1 ) TOC(t2, t1); stat->utime[COMM] += t2; msg_cnt += 2; msg_vol += msgcnt[0]*iword + msgcnt[1]*dword; #endif #if ( DEBUGlevel>=2 ) printf("(%d) Send L(:,%4d): lsub %4d, lusup %4d to Pc %2d\n", iam, k+1, msgcnt[0], msgcnt[1], pj); #endif } } /* for pj ... */ } else { /* Post Recv of block column L(:,k+1). */ if ( ToRecv[k+1] >= 1 ) { scp = &grid->rscp; /* The scope of process row. */ MPI_Irecv(Lsub_buf_2[(k+1)%2], Llu->bufmax[0], mpi_int_t, kcol, (4*(k+1))%NTAGS, scp->comm, &recv_req[0]); MPI_Irecv(Lval_buf_2[(k+1)%2], Llu->bufmax[1], MPI_DOUBLE, kcol, (4*(k+1)+1)%NTAGS, scp->comm, &recv_req[1]); #if ( DEBUGlevel>=2 ) printf("(%d) Post Irecv L(:,%4d)\n", iam, k+1); #endif } } /* if mycol == Pc(k+1) */ } /* if k+1 < nsupers */ if ( msg0 && msg2 ) { /* L(:,k) and U(k,:) are not empty. */ /* --------------------------------------------------- Update all other blocks using block row U(k,:) --------------------------------------------------- */ for (j = 0; j < nub; ++j) { lptr = lptr0; luptr = luptr0; jb = usub[iukp]; /* Global block number of block U(k,j). */ ljb = LBj( jb, grid ); /* Local block number of U(k,j). */ nsupc = SuperSize( jb ); iukp += UB_DESCRIPTOR; /* Start fstnz of block U(k,j). */ /* Prepare to call DGEMM. */ jj = iukp; while ( usub[jj] == klst ) ++jj; ldu = klst - usub[jj++]; ncols = 1; full = 1; for (; jj < iukp+nsupc; ++jj) { segsize = klst - usub[jj]; if ( segsize ) { ++ncols; if ( segsize != ldu ) full = 0; if ( segsize > ldu ) ldu = segsize; } } #if ( DEBUGlevel>=3 ) printf("(%d) full=%d,k=%d,jb=%d,ldu=%d,ncols=%d,nsupc=%d\n", iam, full, k, jb, ldu, ncols, nsupc); ++num_update; #endif if ( full ) { tempu = &uval[rukp]; } else { /* Copy block U(k,j) into tempU2d. */ #if ( DEBUGlevel>=3 ) ++num_copy; #endif tempu = tempU2d; for (jj = iukp; jj < iukp+nsupc; ++jj) { segsize = klst - usub[jj]; if ( segsize ) { lead_zero = ldu - segsize; for (i = 0; i < lead_zero; ++i) tempu[i] = 0.0; tempu += lead_zero; for (i = 0; i < segsize; ++i) tempu[i] = uval[rukp+i]; rukp += segsize; tempu += segsize; } } tempu = tempU2d; rukp -= usub[iukp - 1]; /* Return to start of U(k,j). */ } /* if full ... */ for (lb = 0; lb < nlb; ++lb) { ib = lsub[lptr]; /* Row block L(i,k). */ nbrow = lsub[lptr+1]; /* Number of full rows. */ lptr += LB_DESCRIPTOR; /* Skip descriptor. */ tempv = tempv2d; #ifdef _CRAY SGEMM(ftcs, ftcs, &nbrow, &ncols, &ldu, &alpha, &lusup[luptr+(knsupc-ldu)*nsupr], &nsupr, tempu, &ldu, &beta, tempv, &ldt); #else dgemm_("N", "N", &nbrow, &ncols, &ldu, &alpha, &lusup[luptr+(knsupc-ldu)*nsupr], &nsupr, tempu, &ldu, &beta, tempv, &ldt); #endif stat->ops[FACT] += 2 * nbrow * ldu * ncols; /* Now gather the result into the destination block. */ if ( ib < jb ) { /* A(i,j) is in U. */ ilst = FstBlockC( ib+1 ); lib = LBi( ib, grid ); index = Ufstnz_br_ptr[lib]; ijb = index[iuip[lib]]; while ( ijb < jb ) { /* Search for dest block. */ ruip[lib] += index[iuip[lib]+1]; iuip[lib] += UB_DESCRIPTOR + SuperSize( ijb ); ijb = index[iuip[lib]]; } /* Skip descriptor. Now point to fstnz index of block U(i,j). */ iuip[lib] += UB_DESCRIPTOR; tempv = tempv2d; for (jj = 0; jj < nsupc; ++jj) { segsize = klst - usub[iukp + jj]; fnz = index[iuip[lib]++]; if ( segsize ) { /* Nonzero segment in U(k.j). */ ucol = &Unzval_br_ptr[lib][ruip[lib]]; for (i = 0 ; i < nbrow; ++i) { rel = lsub[lptr + i] - fnz; ucol[rel] -= tempv[i]; } tempv += ldt; } ruip[lib] += ilst - fnz; } } else { /* A(i,j) is in L. */ index = Lrowind_bc_ptr[ljb]; ldv = index[1]; /* LDA of the dest lusup. */ lptrj = BC_HEADER; luptrj = 0; ijb = index[lptrj]; while ( ijb != ib ) { /* Search for dest block -- blocks are not ordered! */ luptrj += index[lptrj+1]; lptrj += LB_DESCRIPTOR + index[lptrj+1]; ijb = index[lptrj]; } /* * Build indirect table. This is needed because the * indices are not sorted for the L blocks. */ fnz = FstBlockC( ib ); lptrj += LB_DESCRIPTOR; for (i = 0; i < index[lptrj-1]; ++i) { rel = index[lptrj + i] - fnz; indirect[rel] = i; } nzval = Lnzval_bc_ptr[ljb] + luptrj; tempv = tempv2d; for (jj = 0; jj < nsupc; ++jj) { segsize = klst - usub[iukp + jj]; if ( segsize ) { /*#pragma _CRI cache_bypass nzval,tempv*/ for (i = 0; i < nbrow; ++i) { rel = lsub[lptr + i] - fnz; nzval[indirect[rel]] -= tempv[i]; } tempv += ldt; } nzval += ldv; } } /* if ib < jb ... */ lptr += nbrow; luptr += nbrow; } /* for lb ... */ rukp += usub[iukp - 1]; /* Move to block U(k,j+1) */ iukp += nsupc; } /* for j ... */ } /* if k L(:,k) and U(k,:) are not empty */ } /* ------------------------------------------ END MAIN LOOP: for k = ... ------------------------------------------ */ #if ( VAMPIR>=1 ) VT_end(100); VT_traceoff(); #endif if ( Pr*Pc > 1 ) { SUPERLU_FREE(Lsub_buf_2[0]); /* also free Lsub_buf_2[1] */ SUPERLU_FREE(Lval_buf_2[0]); /* also free Lval_buf_2[1] */ if ( Llu->bufmax[2] != 0 ) SUPERLU_FREE(Usub_buf); if ( Llu->bufmax[3] != 0 ) SUPERLU_FREE(Uval_buf); SUPERLU_FREE(send_req); } SUPERLU_FREE(Llu->ujrow); SUPERLU_FREE(tempv2d); SUPERLU_FREE(indirect); SUPERLU_FREE(iuip); SUPERLU_FREE(ruip); /* Prepare error message. */ if ( *info == 0 ) *info = n + 1; #if ( PROFlevel>=1 ) TIC(t1); #endif MPI_Allreduce( info, &iinfo, 1, mpi_int_t, MPI_MIN, grid->comm ); #if ( PROFlevel>=1 ) TOC(t2, t1); stat->utime[COMM] += t2; { float msg_vol_max, msg_vol_sum, msg_cnt_max, msg_cnt_sum; MPI_Reduce( &msg_cnt, &msg_cnt_sum, 1, MPI_FLOAT, MPI_SUM, 0, grid->comm ); MPI_Reduce( &msg_cnt, &msg_cnt_max, 1, MPI_FLOAT, MPI_MAX, 0, grid->comm ); MPI_Reduce( &msg_vol, &msg_vol_sum, 1, MPI_FLOAT, MPI_SUM, 0, grid->comm ); MPI_Reduce( &msg_vol, &msg_vol_max, 1, MPI_FLOAT, MPI_MAX, 0, grid->comm ); if ( !iam ) { printf("\tPDGSTRF comm stat:" "\tAvg\tMax\t\tAvg\tMax\n" "\t\t\tCount:\t%.0f\t%.0f\tVol(MB)\t%.2f\t%.2f\n", msg_cnt_sum/Pr/Pc, msg_cnt_max, msg_vol_sum/Pr/Pc*1e-6, msg_vol_max*1e-6); } } #endif if ( iinfo == n + 1 ) *info = 0; else *info = iinfo; #if ( PRNTlevel==3 ) MPI_Allreduce( &zero_msg, &iinfo, 1, mpi_int_t, MPI_SUM, grid->comm ); if ( !iam ) printf(".. # msg of zero size\t%d\n", iinfo); MPI_Allreduce( &total_msg, &iinfo, 1, mpi_int_t, MPI_SUM, grid->comm ); if ( !iam ) printf(".. # total msg\t%d\n", iinfo); #endif #if ( PRNTlevel==2 ) for (i = 0; i < Pr * Pc; ++i) { if ( iam == i ) { dPrintLblocks(iam, nsupers, grid, Glu_persist, Llu); dPrintUblocks(iam, nsupers, grid, Glu_persist, Llu); printf("(%d)\n", iam); PrintInt10("Recv", nsupers, Llu->ToRecv); } MPI_Barrier( grid->comm ); } #endif #if ( DEBUGlevel>=3 ) printf("(%d) num_copy=%d, num_update=%d\n", iam, num_copy, num_update); #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pdgstrf()"); #endif } /* PDGSTRF */ /************************************************************************/ /*! \brief * *
 * Purpose
 * =======
 *   Factor diagonal and subdiagonal blocks and test for exact singularity.
 *   Only the process column that owns block column *k* participates
 *   in the work.
 * 
 * Arguments
 * =========
 *
 * k      (input) int (global)
 *        The column number of the block column to be factorized.
 *
 * thresh (input) double (global)
 *        The threshold value = s_eps * anorm.
 *
 * Glu_persist (input) Glu_persist_t*
 *        Global data structures (xsup, supno) replicated on all processes.
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh.
 *
 * Llu    (input/output) LocalLU_t*
 *        Local data structures to store distributed L and U matrices.
 *
 * stat   (output) SuperLUStat_t*
 *        Record the statistics about the factorization.
 *        See SuperLUStat_t structure defined in util.h.
 *
 * info   (output) int*
 *        = 0: successful exit
 *        < 0: if info = -i, the i-th argument had an illegal value
 *        > 0: if info = i, U(i,i) is exactly zero. The factorization has
 *             been completed, but the factor U is exactly singular,
 *             and division by zero will occur if it is used to solve a
 *             system of equations.
 * 
*/ static void pdgstrf2 /************************************************************************/ ( superlu_options_t *options, int_t k, double thresh, Glu_persist_t *Glu_persist, gridinfo_t *grid, LocalLU_t *Llu, SuperLUStat_t *stat, int* info ) { int c, iam, l, pkk; int incx = 1, incy = 1; int nsupr; /* number of rows in the block (LDA) */ int luptr; int_t i, krow, j, jfst, jlst; int_t nsupc; /* number of columns in the block */ int_t *xsup = Glu_persist->xsup; double *lusup, temp; double *ujrow; double alpha = -1; *info = 0; /* Quick return. */ /* Initialization. */ iam = grid->iam; krow = PROW( k, grid ); pkk = PNUM( PROW(k, grid), PCOL(k, grid), grid ); j = LBj( k, grid ); /* Local block number */ jfst = FstBlockC( k ); jlst = FstBlockC( k+1 ); lusup = Llu->Lnzval_bc_ptr[j]; nsupc = SuperSize( k ); if ( Llu->Lrowind_bc_ptr[j] ) nsupr = Llu->Lrowind_bc_ptr[j][1]; ujrow = Llu->ujrow; luptr = 0; /* Point to the diagonal entries. */ c = nsupc; for (j = 0; j < jlst - jfst; ++j) { /* Broadcast the j-th row (nsupc - j) elements to the process column. */ if ( iam == pkk ) { /* Diagonal process. */ i = luptr; if ( options->ReplaceTinyPivot == YES || lusup[i] == 0.0 ) { if ( fabs(lusup[i]) < thresh ) { /* Diagonal */ #if ( PRNTlevel>=2 ) printf("(%d) .. col %d, tiny pivot %e ", iam, jfst+j, lusup[i]); #endif /* Keep the replaced diagonal with the same sign. */ if ( lusup[i] < 0 ) lusup[i] = -thresh; else lusup[i] = thresh; #if ( PRNTlevel>=2 ) printf("replaced by %e\n", lusup[i]); #endif ++(stat->TinyPivots); } } for (l = 0; l < c; ++l, i += nsupr) ujrow[l] = lusup[i]; } #if 0 dbcast_col(ujrow, c, pkk, UjROW, grid, &c); #else MPI_Bcast(ujrow, c, MPI_DOUBLE, krow, (grid->cscp).comm); /*bcast_tree(ujrow, c, MPI_DOUBLE, krow, (24*k+j)%NTAGS, grid, COMM_COLUMN, &c);*/ #endif #if ( DEBUGlevel>=2 ) if ( k == 3329 && j == 2 ) { if ( iam == pkk ) { printf("..(%d) k %d, j %d: Send ujrow[0] %e\n",iam,k,j,ujrow[0]); } else { printf("..(%d) k %d, j %d: Recv ujrow[0] %e\n",iam,k,j,ujrow[0]); } } #endif if ( !lusup ) { /* Empty block column. */ --c; if ( ujrow[0] == 0.0 ) *info = j+jfst+1; continue; } /* Test for singularity. */ if ( ujrow[0] == 0.0 ) { *info = j+jfst+1; } else { /* Scale the j-th column of the matrix. */ temp = 1.0 / ujrow[0]; if ( iam == pkk ) { for (i = luptr+1; i < luptr-j+nsupr; ++i) lusup[i] *= temp; stat->ops[FACT] += nsupr-j-1; } else { for (i = luptr; i < luptr+nsupr; ++i) lusup[i] *= temp; stat->ops[FACT] += nsupr; } } /* Rank-1 update of the trailing submatrix. */ if ( --c ) { if ( iam == pkk ) { l = nsupr - j - 1; #ifdef _CRAY SGER(&l, &c, &alpha, &lusup[luptr+1], &incx, &ujrow[1], &incy, &lusup[luptr+nsupr+1], &nsupr); #else dger_(&l, &c, &alpha, &lusup[luptr+1], &incx, &ujrow[1], &incy, &lusup[luptr+nsupr+1], &nsupr); #endif stat->ops[FACT] += 2 * l * c; } else { #ifdef _CRAY SGER(&nsupr, &c, &alpha, &lusup[luptr], &incx, &ujrow[1], &incy, &lusup[luptr+nsupr], &nsupr); #else dger_(&nsupr, &c, &alpha, &lusup[luptr], &incx, &ujrow[1], &incy, &lusup[luptr+nsupr], &nsupr); #endif stat->ops[FACT] += 2 * nsupr * c; } } /* Move to the next column. */ if ( iam == pkk ) luptr += nsupr + 1; else luptr += nsupr; } /* for j ... */ } /* PDGSTRF2 */ /************************************************************************/ /*! \brief * *
 
 * Purpose
 * =======
 *   Perform parallel triangular solves
 *           U(k,:) := A(k,:) \ L(k,k). 
 *   Only the process column that owns block column *k* participates
 *   in the work.
 * 
 * Arguments
 * =========
 *
 * m      (input) int (global)
 *        Number of rows in the matrix.
 *
 * k      (input) int (global)
 *        The row number of the block row to be factorized.
 *
 * Glu_persist (input) Glu_persist_t*
 *        Global data structures (xsup, supno) replicated on all processes.
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh.
 *
 * Llu    (input/output) LocalLU_t*
 *        Local data structures to store distributed L and U matrices.
 *
 * stat   (output) SuperLUStat_t*
 *        Record the statistics about the factorization; 
 *        See SuperLUStat_t structure defined in util.h.
 * 
*/ static void pdgstrs2 /************************************************************************/ #ifdef _CRAY ( int_t m, int_t k, Glu_persist_t *Glu_persist, gridinfo_t *grid, LocalLU_t *Llu, SuperLUStat_t *stat, _fcd ftcs1, _fcd ftcs2, _fcd ftcs3 ) #else ( int_t m, int_t k, Glu_persist_t *Glu_persist, gridinfo_t *grid, LocalLU_t *Llu, SuperLUStat_t *stat ) #endif { int iam, pkk; int incx = 1; int nsupr; /* number of rows in the block L(:,k) (LDA) */ int segsize; int_t nsupc; /* number of columns in the block */ int_t luptr, iukp, rukp; int_t b, gb, j, klst, knsupc, lk, nb; int_t *xsup = Glu_persist->xsup; int_t *usub; double *lusup, *uval; /* Quick return. */ lk = LBi( k, grid ); /* Local block number */ if ( !Llu->Unzval_br_ptr[lk] ) return; /* Initialization. */ iam = grid->iam; pkk = PNUM( PROW(k, grid), PCOL(k, grid), grid ); klst = FstBlockC( k+1 ); knsupc = SuperSize( k ); usub = Llu->Ufstnz_br_ptr[lk]; /* index[] of block row U(k,:) */ uval = Llu->Unzval_br_ptr[lk]; nb = usub[0]; iukp = BR_HEADER; rukp = 0; if ( iam == pkk ) { lk = LBj( k, grid ); nsupr = Llu->Lrowind_bc_ptr[lk][1]; /* LDA of lusup[] */ lusup = Llu->Lnzval_bc_ptr[lk]; } else { nsupr = Llu->Lsub_buf_2[k%2][1]; /* LDA of lusup[] */ lusup = Llu->Lval_buf_2[k%2]; } /* Loop through all the row blocks. */ for (b = 0; b < nb; ++b) { gb = usub[iukp]; nsupc = SuperSize( gb ); iukp += UB_DESCRIPTOR; /* Loop through all the segments in the block. */ for (j = 0; j < nsupc; ++j) { segsize = klst - usub[iukp++]; if ( segsize ) { /* Nonzero segment. */ luptr = (knsupc - segsize) * (nsupr + 1); #ifdef _CRAY STRSV(ftcs1, ftcs2, ftcs3, &segsize, &lusup[luptr], &nsupr, &uval[rukp], &incx); #else dtrsv_("L", "N", "U", &segsize, &lusup[luptr], &nsupr, &uval[rukp], &incx); #endif stat->ops[FACT] += segsize * (segsize + 1); rukp += segsize; } } } /* for b ... */ } /* PDGSTRS2 */ static int probe_recv(int iam, int source, int tag, MPI_Datatype datatype, MPI_Comm comm, int buf_size) { MPI_Status status; int count; MPI_Probe( source, tag, comm, &status ); MPI_Get_count( &status, datatype, &count ); if ( count > buf_size ) { printf("(%d) Recv'ed count %d > buffer size $d\n", iam, count, buf_size); exit(-1); } return 0; } SuperLU_DIST_5.3.0/SRC/zdistribute_mark.c0000644013363400111340000005770613233431301017030 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Distribute the matrix onto the 2D process mesh * *
 * NOTE zdistribute_mark.c
 * ====
 * This version is faster for Mark Baertschy's matrices, remains to be
 * tested for the other matrices.
 *
 * Main difference: there is no dense SPA involved when distributing A into
 * the U structure. That is, the entries in upper triangle of A are loaded
 * directly into U.
 * 
 * The locations of modifications have XSL comments.
 *
 * Date: Apr 23 09:54:15 PDT 2001
 * 
*/ #include "superlu_zdefs.h" /*! \brief * *
 * -- Distributed SuperLU routine (version 1.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 *
 *
 * Purpose
 * =======
 *   Distribute the matrix onto the 2D process mesh.
 * 
 * Arguments
 * =========
 * 
 * fact (input) fact_t
 *        Specifies whether or not the L and U structures will be re-used.
 *        = SamePattern_SameRowPerm: L and U structures are input, and
 *                                   unchanged on exit.
 *        = DOFACT or SamePattern: L and U structures are computed and output.
 *
 * n      (input) int
 *        Dimension of the matrix.
 *
 * A      (input) SuperMatrix*
 *	  The original matrix A, permuted by columns, of dimension
 *        (A->nrow, A->ncol). The type of A can be:
 *        Stype = NCP; Dtype = Z; Mtype = GE.
 *
 * LUstruct (input) LUstruct_t*
 *        Data structures for L and U factors.
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh.
 * 
*/ int_t zdistribute(fact_t fact, int_t n, SuperMatrix *A, Glu_freeable_t *Glu_freeable, LUstruct_t *LUstruct, gridinfo_t *grid) { Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; int_t bnnz, fsupc, i, irow, istart, j, jb, jj, k, len, len1, nsupc; int_t ljb; /* local block column number */ int_t nrbl; /* number of L blocks in current block column */ int_t nrbu; /* number of U blocks in current block column */ int_t gb; /* global block number; 0 < gb <= nsuper */ int_t lb; /* local block number; 0 < lb <= ceil(NSUPERS/Pr) */ int iam, jbrow, kcol, mycol, myrow, pc, pr; int_t mybufmax[NBUFFERS]; NCPformat *Astore; doublecomplex *a; int_t *asub; int_t *xa_begin, *xa_end; int_t *xsup = Glu_persist->xsup; /* supernode and column mapping */ int_t *supno = Glu_persist->supno; int_t *lsub, *xlsub, *usub, *xusub; int_t nsupers; int_t next_lind; /* next available position in index[*] */ int_t next_lval; /* next available position in nzval[*] */ int_t *index; /* indices consist of headers and row subscripts */ doublecomplex *lusup, *uval; /* nonzero values in L and U */ doublecomplex **Lnzval_bc_ptr; /* size ceil(NSUPERS/Pc) */ int_t **Lrowind_bc_ptr; /* size ceil(NSUPERS/Pc) */ doublecomplex **Unzval_br_ptr; /* size ceil(NSUPERS/Pr) */ int_t **Ufstnz_br_ptr; /* size ceil(NSUPERS/Pr) */ /*-- Counts to be used in factorization. --*/ int_t *ToRecv, *ToSendD, **ToSendR; /*-- Counts to be used in lower triangular solve. --*/ int_t *fmod; /* Modification count for L-solve. */ int_t **fsendx_plist; /* Column process list to send down Xk. */ int_t nfrecvx = 0; /* Number of Xk I will receive. */ int_t kseen; /*-- Counts to be used in upper triangular solve. --*/ int_t *bmod; /* Modification count for U-solve. */ int_t **bsendx_plist; /* Column process list to send down Xk. */ int_t nbrecvx = 0; /* Number of Xk I will receive. */ int_t *ilsum; /* starting position of each supernode in the full array (local) */ /*-- Auxiliary arrays; freed on return --*/ int_t *rb_marker; /* block hit marker; size ceil(NSUPERS/Pr) */ int_t *Urb_length; /* U block length; size ceil(NSUPERS/Pr) */ int_t *Urb_indptr; /* pointers to U index[]; size ceil(NSUPERS/Pr) */ int_t *Urb_fstnz; /* # of fstnz in a block row; size ceil(NSUPERS/Pr) */ int_t *Ucbs; /* number of column blocks in a block row */ int_t *Lrb_length; /* L block length; size ceil(NSUPERS/Pr) */ int_t *Lrb_number; /* global block number; size ceil(NSUPERS/Pr) */ int_t *Lrb_indptr; /* pointers to L index[]; size ceil(NSUPERS/Pr) */ int_t *Lrb_valptr; /* pointers to L nzval[]; size ceil(NSUPERS/Pr) */ doublecomplex *dense, *dense_col; /* SPA */ doublecomplex zero = {0.0, 0.0}; int_t ldaspa; /* LDA of SPA */ int_t mem_use = 0, iword, zword; #if ( PRNTlevel>=1 ) int_t nLblocks = 0, nUblocks = 0; #endif /* Initialization. */ iam = grid->iam; myrow = MYROW( iam, grid ); mycol = MYCOL( iam, grid ); for (i = 0; i < NBUFFERS; ++i) mybufmax[i] = 0; nsupers = supno[n-1] + 1; Astore = A->Store; a = Astore->nzval; asub = Astore->rowind; xa_begin = Astore->colbeg; xa_end = Astore->colend; #if ( PRNTlevel>=1 ) iword = sizeof(int_t); zword = sizeof(doublecomplex); #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter zdistribute()"); #endif if ( fact == SamePattern_SameRowPerm ) { /* We can propagate the new values of A into the existing L and U data structures. */ ilsum = Llu->ilsum; ldaspa = Llu->ldalsum; if ( !(dense = doublecomplexCalloc_dist(ldaspa * sp_ienv_dist(3))) ) ABORT("Calloc fails for SPA dense[]."); nrbu = CEILING( nsupers, grid->nprow ); /* Number of local block rows */ if ( !(Urb_length = intCalloc_dist(nrbu)) ) ABORT("Calloc fails for Urb_length[]."); if ( !(Urb_indptr = intMalloc_dist(nrbu)) ) ABORT("Malloc fails for Urb_indptr[]."); for (lb = 0; lb < nrbu; ++lb) Urb_indptr[lb] = BR_HEADER; /* Skip header in U index[]. */ Lrowind_bc_ptr = Llu->Lrowind_bc_ptr; Lnzval_bc_ptr = Llu->Lnzval_bc_ptr; Ufstnz_br_ptr = Llu->Ufstnz_br_ptr; Unzval_br_ptr = Llu->Unzval_br_ptr; #if ( PRNTlevel>=1 ) mem_use += 2*nrbu*iword + ldaspa*sp_ienv_dist(3)*zword; #endif for (jb = 0; jb < nsupers; ++jb) { /* Loop through each block column */ pc = PCOL( jb, grid ); if ( mycol == pc ) { /* Block column jb in my process column */ fsupc = FstBlockC( jb ); nsupc = SuperSize( jb ); /* Scatter A into SPA. */ for (j = fsupc, dense_col = dense; j < FstBlockC(jb+1); ++j) { for (i = xa_begin[j]; i < xa_end[j]; ++i) { irow = asub[i]; gb = BlockNum( irow ); if ( myrow == PROW( gb, grid ) ) { lb = LBi( gb, grid ); irow = ilsum[lb] + irow - FstBlockC( gb ); dense_col[irow] = a[i]; } } dense_col += ldaspa; } /* Gather the values of A from SPA into Unzval[]. */ for (lb = 0; lb < nrbu; ++lb) { index = Ufstnz_br_ptr[lb]; if ( index && index[Urb_indptr[lb]] == jb ) { uval = Unzval_br_ptr[lb]; len = Urb_indptr[lb] + UB_DESCRIPTOR; gb = lb * grid->nprow + myrow;/* Global block number */ k = FstBlockC( gb+1 ); irow = ilsum[lb] - FstBlockC( gb ); for (jj = 0, dense_col = dense; jj < nsupc; ++jj) { j = index[len+jj]; for (i = j; i < k; ++i) { uval[Urb_length[lb]++] = dense_col[irow+i]; dense_col[irow+i] = zero; } dense_col += ldaspa; } Urb_indptr[lb] += UB_DESCRIPTOR + nsupc; } } /* for lb ... */ /* Gather the values of A from SPA into Lnzval[]. */ ljb = LBj( jb, grid ); /* Local block number */ index = Lrowind_bc_ptr[ljb]; if ( index ) { nrbl = index[0]; /* Number of row blocks. */ len = index[1]; /* LDA of lusup[]. */ lusup = Lnzval_bc_ptr[ljb]; next_lind = BC_HEADER; next_lval = 0; for (jj = 0; jj < nrbl; ++jj) { gb = index[next_lind++]; len1 = index[next_lind++]; /* Rows in the block. */ lb = LBi( gb, grid ); for (bnnz = 0; bnnz < len1; ++bnnz) { irow = index[next_lind++]; /* Global index. */ irow = ilsum[lb] + irow - FstBlockC( gb ); k = next_lval++; for (j = 0, dense_col = dense; j < nsupc; ++j) { lusup[k] = dense_col[irow]; dense_col[irow] = zero; k += len; dense_col += ldaspa; } } /* for bnnz ... */ } /* for jj ... */ } /* if index ... */ } /* if mycol == pc */ } /* for jb ... */ SUPERLU_FREE(dense); SUPERLU_FREE(Urb_length); SUPERLU_FREE(Urb_indptr); } else { /* No L and U data structures are available yet. We need to set up the L and U data structures and propagate the values of A into them. */ lsub = Glu_freeable->lsub; /* compressed L subscripts */ xlsub = Glu_freeable->xlsub; usub = Glu_freeable->usub; /* compressed U subscripts */ xusub = Glu_freeable->xusub; if ( !(ToRecv = intCalloc_dist(nsupers)) ) ABORT("Calloc fails for ToRecv[]."); k = CEILING( nsupers, grid->npcol );/* Number of local column blocks */ if ( !(ToSendR = (int_t **) SUPERLU_MALLOC(k*sizeof(int_t*))) ) ABORT("Malloc fails for ToSendR[]."); j = k * grid->npcol; if ( !(index = intMalloc_dist(j)) ) ABORT("Malloc fails for index[]."); #if ( PRNTlevel>=1 ) mem_use = k*sizeof(int_t*) + (j + nsupers)*iword; #endif for (i = 0; i < j; ++i) index[i] = EMPTY; for (i = 0, j = 0; i < k; ++i, j += grid->npcol) ToSendR[i] = &index[j]; k = CEILING( nsupers, grid->nprow ); /* Number of local block rows */ /* Pointers to the beginning of each block row of U. */ if ( !(Unzval_br_ptr = (doublecomplex**)SUPERLU_MALLOC(k * sizeof(doublecomplex*))) ) ABORT("Malloc fails for Unzval_br_ptr[]."); if ( !(Ufstnz_br_ptr = (int_t**)SUPERLU_MALLOC(k * sizeof(int_t*))) ) ABORT("Malloc fails for Ufstnz_br_ptr[]."); if ( !(ToSendD = intCalloc_dist(k)) ) ABORT("Malloc fails for ToSendD[]."); if ( !(ilsum = intMalloc_dist(k+1)) ) ABORT("Malloc fails for ilsum[]."); /* Auxiliary arrays used to set up U block data structures. They are freed on return. */ if ( !(rb_marker = intCalloc_dist(k)) ) ABORT("Calloc fails for rb_marker[]."); if ( !(Urb_length = intCalloc_dist(k)) ) ABORT("Calloc fails for Urb_length[]."); if ( !(Urb_indptr = intMalloc_dist(k)) ) ABORT("Malloc fails for Urb_indptr[]."); if ( !(Urb_fstnz = intCalloc_dist(k)) ) ABORT("Calloc fails for Urb_fstnz[]."); if ( !(Ucbs = intCalloc_dist(k)) ) ABORT("Calloc fails for Ucbs[]."); #if ( PRNTlevel>=1 ) mem_use = 2*k*sizeof(int_t*) + (7*k+1)*iword; #endif /* Compute ldaspa and ilsum[]. */ ldaspa = 0; ilsum[0] = 0; for (gb = 0; gb < nsupers; ++gb) { if ( myrow == PROW( gb, grid ) ) { i = SuperSize( gb ); ldaspa += i; lb = LBi( gb, grid ); ilsum[lb + 1] = ilsum[lb] + i; } } /* ------------------------------------------------------------ COUNT NUMBER OF ROW BLOCKS AND THE LENGTH OF EACH BLOCK IN U. THIS ACCOUNTS FOR ONE-PASS PROCESSING OF G(U). ------------------------------------------------------------*/ /* Loop through each supernode column. */ for (jb = 0; jb < nsupers; ++jb) { pc = PCOL( jb, grid ); fsupc = FstBlockC( jb ); nsupc = SuperSize( jb ); /* Loop through each column in the block. */ for (j = fsupc; j < fsupc + nsupc; ++j) { /* usub[*] contains only "first nonzero" in each segment. */ for (i = xusub[j]; i < xusub[j+1]; ++i) { irow = usub[i]; /* First nonzero of the segment. */ gb = BlockNum( irow ); kcol = PCOL( gb, grid ); ljb = LBj( gb, grid ); if ( mycol == kcol && mycol != pc ) ToSendR[ljb][pc] = YES; pr = PROW( gb, grid ); lb = LBi( gb, grid ); if ( mycol == pc ) { if ( myrow == pr ) { ToSendD[lb] = YES; /* Count nonzeros in entire block row. */ Urb_length[lb] += FstBlockC( gb+1 ) - irow; if (rb_marker[lb] <= jb) {/* First see the block */ rb_marker[lb] = jb + 1; Urb_fstnz[lb] += nsupc; ++Ucbs[lb]; /* Number of column blocks in block row lb. */ #if ( PRNTlevel>=1 ) ++nUblocks; #endif } ToRecv[gb] = 1; } else ToRecv[gb] = 2; /* Do I need 0, 1, 2 ? */ } } /* for i ... */ } /* for j ... */ } /* for jb ... */ /* Set up the initial pointers for each block row in U. */ nrbu = CEILING( nsupers, grid->nprow );/* Number of local block rows */ for (lb = 0; lb < nrbu; ++lb) { len = Urb_length[lb]; rb_marker[lb] = 0; /* Reset block marker. */ if ( len ) { /* Add room for descriptors */ len1 = Urb_fstnz[lb] + BR_HEADER + Ucbs[lb] * UB_DESCRIPTOR; if ( !(index = intMalloc_dist(len1+1)) ) ABORT("Malloc fails for Uindex[]."); Ufstnz_br_ptr[lb] = index; /* XSL 4-23-01 */ if ( !(Unzval_br_ptr[lb] = doublecomplexCalloc_dist(len)) ) ABORT("Calloc fails for Unzval_br_ptr[*][]."); mybufmax[2] = SUPERLU_MAX( mybufmax[2], len1 ); mybufmax[3] = SUPERLU_MAX( mybufmax[3], len ); index[0] = Ucbs[lb]; /* Number of column blocks */ index[1] = len; /* Total length of nzval[] */ index[2] = len1; /* Total length of index[] */ index[len1] = -1; /* End marker */ } else { Ufstnz_br_ptr[lb] = NULL; Unzval_br_ptr[lb] = NULL; } Urb_length[lb] = 0; /* Reset block length. */ Urb_indptr[lb] = BR_HEADER; /* Skip header in U index[]. */ } /* for lb ... */ SUPERLU_FREE(Urb_fstnz); SUPERLU_FREE(Ucbs); #if ( PRNTlevel>=1 ) mem_use -= 2*k * iword; #endif /* Auxiliary arrays used to set up L block data structures. They are freed on return. k is the number of local row blocks. */ if ( !(Lrb_length = intCalloc_dist(k)) ) ABORT("Calloc fails for Lrb_length[]."); if ( !(Lrb_number = intMalloc_dist(k)) ) ABORT("Malloc fails for Lrb_number[]."); if ( !(Lrb_indptr = intMalloc_dist(k)) ) ABORT("Malloc fails for Lrb_indptr[]."); if ( !(Lrb_valptr = intMalloc_dist(k)) ) ABORT("Malloc fails for Lrb_valptr[]."); if ( !(dense = doublecomplexCalloc_dist(ldaspa * sp_ienv_dist(3))) ) ABORT("Calloc fails for SPA dense[]."); /* These counts will be used for triangular solves. */ if ( !(fmod = intCalloc_dist(k)) ) ABORT("Calloc fails for fmod[]."); if ( !(bmod = intCalloc_dist(k)) ) ABORT("Calloc fails for bmod[]."); #if ( PRNTlevel>=1 ) mem_use += 6*k*iword + ldaspa*sp_ienv_dist(3)*zword; #endif k = CEILING( nsupers, grid->npcol );/* Number of local block columns */ /* Pointers to the beginning of each block column of L. */ if ( !(Lnzval_bc_ptr = (doublecomplex**)SUPERLU_MALLOC(k * sizeof(doublecomplex*))) ) ABORT("Malloc fails for Lnzval_bc_ptr[]."); if ( !(Lrowind_bc_ptr = (int_t**)SUPERLU_MALLOC(k * sizeof(int_t*))) ) ABORT("Malloc fails for Lrowind_bc_ptr[]."); Lrowind_bc_ptr[k-1] = NULL; /* These lists of processes will be used for triangular solves. */ if ( !(fsendx_plist = (int_t **) SUPERLU_MALLOC(k*sizeof(int_t*))) ) ABORT("Malloc fails for fsendx_plist[]."); len = k * grid->nprow; if ( !(index = intMalloc_dist(len)) ) ABORT("Malloc fails for fsendx_plist[0]"); for (i = 0; i < len; ++i) index[i] = EMPTY; for (i = 0, j = 0; i < k; ++i, j += grid->nprow) fsendx_plist[i] = &index[j]; if ( !(bsendx_plist = (int_t **) SUPERLU_MALLOC(k*sizeof(int_t*))) ) ABORT("Malloc fails for bsendx_plist[]."); if ( !(index = intMalloc_dist(len)) ) ABORT("Malloc fails for bsendx_plist[0]"); for (i = 0; i < len; ++i) index[i] = EMPTY; for (i = 0, j = 0; i < k; ++i, j += grid->nprow) bsendx_plist[i] = &index[j]; #if ( PRNTlevel>=1 ) mem_use += 4*k*sizeof(int_t*) + 2*len*iword; #endif /*------------------------------------------------------------ PROPAGATE ROW SUBSCRIPTS AND VALUES OF A INTO L AND U BLOCKS. THIS ACCOUNTS FOR ONE-PASS PROCESSING OF A, L AND U. ------------------------------------------------------------*/ for (jb = 0; jb < nsupers; ++jb) { pc = PCOL( jb, grid ); if ( mycol == pc ) { /* Block column jb in my process column */ fsupc = FstBlockC( jb ); nsupc = SuperSize( jb ); ljb = LBj( jb, grid ); /* Local block number */ /* Scatter A into SPA. */ for (j = fsupc, dense_col = dense; j < FstBlockC( jb+1 ); ++j){ for (i = xa_begin[j]; i < xa_end[j]; ++i) { irow = asub[i]; if ( irow < fsupc ) continue; /* Skip U. XSL 4-23-01 */ gb = BlockNum( irow ); if ( myrow == PROW( gb, grid ) ) { lb = LBi( gb, grid ); irow = ilsum[lb] + irow - FstBlockC( gb ); dense_col[irow] = a[i]; } } dense_col += ldaspa; } jbrow = PROW( jb, grid ); /*------------------------------------------------ * SET UP U BLOCKS. *------------------------------------------------*/ kseen = 0; /* Loop through each column in the block column. */ for (j = fsupc; j < FstBlockC( jb+1 ); ++j) { istart = xusub[j]; for (i = istart; i < xusub[j+1]; ++i) { irow = usub[i]; /* First nonzero in the segment. */ gb = BlockNum( irow ); pr = PROW( gb, grid ); if ( pr != jbrow ) bsendx_plist[ljb][pr] = YES; if ( myrow == pr ) { lb = LBi( gb, grid ); /* Local block number */ index = Ufstnz_br_ptr[lb]; if (rb_marker[lb] <= jb) {/* First see the block */ rb_marker[lb] = jb + 1; index[Urb_indptr[lb]] = jb; /* Descriptor */ /* Initialize block length to 0. XSL 4-23-01 */ index[Urb_indptr[lb]+1] = 0; Urb_indptr[lb] += UB_DESCRIPTOR; len = Urb_indptr[lb]; for (k = 0; k < nsupc; ++k) index[len+k] = FstBlockC( gb+1 ); if ( gb != jb )/* Exclude diagonal block. */ ++bmod[lb];/* Mod. count for back solve */ if ( kseen == 0 && myrow != jbrow ) { ++nbrecvx; kseen = 1; } } else { len = Urb_indptr[lb];/* Start fstnz in index */ } jj = j - fsupc; index[len+jj] = irow; } /* if myrow == pr ... */ } /* for i ... */ } /* for j ... */ #if 1 /* XSL 4-23-01 */ for (j = fsupc; j < FstBlockC( jb+1 ); ++j) { /* Gather the initial values of A directly into Uval. (No SPA is involved.) */ for (i = xa_begin[j]; i < xa_end[j]; ++i) { irow = asub[i]; if ( irow >= fsupc ) continue; /* Skip L */ gb = BlockNum( irow ); if ( myrow == PROW( gb, grid ) ) { lb = LBi( gb, grid ); index = Ufstnz_br_ptr[lb]; uval = Unzval_br_ptr[lb]; len = Urb_indptr[lb]; jj = index[len]; /* First nonzero in segment */ uval[Urb_length[lb] + irow - jj] = a[i]; } } /* Now increment the index pointer for each row block */ for (lb = 0; lb < nrbu; ++lb) { if ( rb_marker[lb] == jb+1 ) { /* Not an empty block */ gb = lb*grid->nprow + myrow; /* Global block # */ index = Ufstnz_br_ptr[lb]; jj = index[Urb_indptr[lb]]; k = FstBlockC( gb+1 ) - jj; Urb_length[lb] += k; /* Increment the block length */ index[Urb_indptr[lb]+fsupc-j-1] += k; Urb_indptr[lb] += 1; } } } /* for j = fsupc ... */ #else /* Figure out how many nonzeros in each block, and gather the initial values of A from SPA into Uval. */ for (lb = 0; lb < nrbu; ++lb) { if ( rb_marker[lb] == jb + 1 ) { /* Not an empty block. */ index = Ufstnz_br_ptr[lb]; uval = Unzval_br_ptr[lb]; len = Urb_indptr[lb]; gb = lb * grid->nprow + myrow;/* Global block number */ k = FstBlockC( gb+1 ); irow = ilsum[lb] - FstBlockC( gb ); for (jj=0, bnnz=0, dense_col=dense; jj < nsupc; ++jj) { j = index[len+jj]; /* First nonzero in segment. */ bnnz += k - j; for (i = j; i < k; ++i) { uval[Urb_length[lb]++] = dense_col[irow + i]; dense_col[irow + i] = zero; } dense_col += ldaspa; } index[len-1] = bnnz; /* Set block length in Descriptor */ Urb_indptr[lb] += nsupc; } } /* for lb ... */ #endif /*------------------------------------------------ * SET UP L BLOCKS. *------------------------------------------------*/ /* Count number of blocks and length of each block. */ nrbl = 0; len = 0; /* Number of row subscripts I own. */ kseen = 0; istart = xlsub[fsupc]; for (i = istart; i < xlsub[fsupc+1]; ++i) { irow = lsub[i]; gb = BlockNum( irow ); /* Global block number */ pr = PROW( gb, grid ); /* Process row owning this block */ if ( pr != jbrow ) fsendx_plist[ljb][pr] = YES; if ( myrow == pr ) { lb = LBi( gb, grid ); /* Local block number */ if (rb_marker[lb] <= jb) { /* First see this block */ rb_marker[lb] = jb + 1; Lrb_length[lb] = 1; Lrb_number[nrbl++] = gb; if ( gb != jb ) /* Exclude diagonal block. */ ++fmod[lb]; /* Mod. count for forward solve */ if ( kseen == 0 && myrow != jbrow ) { ++nfrecvx; kseen = 1; } #if ( PRNTlevel>=1 ) ++nLblocks; #endif } else { ++Lrb_length[lb]; } ++len; } } /* for i ... */ if ( nrbl ) { /* Do not ensure the blocks are sorted! */ /* Set up the initial pointers for each block in index[] and nzval[]. */ /* Add room for descriptors */ len1 = len + BC_HEADER + nrbl * LB_DESCRIPTOR; if ( !(index = intMalloc_dist(len1)) ) ABORT("Malloc fails for index[]"); Lrowind_bc_ptr[ljb] = index; if ( !(Lnzval_bc_ptr[ljb] = doublecomplexMalloc_dist(len*nsupc)) ) { fprintf(stderr, "col block %d ", jb); ABORT("Malloc fails for Lnzval_bc_ptr[*][]"); } mybufmax[0] = SUPERLU_MAX( mybufmax[0], len1 ); mybufmax[1] = SUPERLU_MAX( mybufmax[1], len*nsupc ); mybufmax[4] = SUPERLU_MAX( mybufmax[4], len ); index[0] = nrbl; /* Number of row blocks */ index[1] = len; /* LDA of the nzval[] */ next_lind = BC_HEADER; next_lval = 0; for (k = 0; k < nrbl; ++k) { gb = Lrb_number[k]; lb = LBi( gb, grid ); len = Lrb_length[lb]; Lrb_length[lb] = 0; /* Reset vector of block length */ index[next_lind++] = gb; /* Descriptor */ index[next_lind++] = len; Lrb_indptr[lb] = next_lind; Lrb_valptr[lb] = next_lval; next_lind += len; next_lval += len; } /* Propagate the compressed row subscripts to Lindex[], and the initial values of A from SPA into Lnzval[]. */ lusup = Lnzval_bc_ptr[ljb]; len = index[1]; /* LDA of lusup[] */ for (i = istart; i < xlsub[fsupc+1]; ++i) { irow = lsub[i]; gb = BlockNum( irow ); if ( myrow == PROW( gb, grid ) ) { lb = LBi( gb, grid ); k = Lrb_indptr[lb]++; /* Random access a block */ index[k] = irow; k = Lrb_valptr[lb]++; irow = ilsum[lb] + irow - FstBlockC( gb ); for (j = 0, dense_col = dense; j < nsupc; ++j) { lusup[k] = dense_col[irow]; dense_col[irow] = zero; k += len; dense_col += ldaspa; } } } /* for i ... */ } else { Lrowind_bc_ptr[ljb] = NULL; Lnzval_bc_ptr[ljb] = NULL; } /* if nrbl ... */ } /* if mycol == pc */ } /* for jb ... */ Llu->Lrowind_bc_ptr = Lrowind_bc_ptr; Llu->Lnzval_bc_ptr = Lnzval_bc_ptr; Llu->Ufstnz_br_ptr = Ufstnz_br_ptr; Llu->Unzval_br_ptr = Unzval_br_ptr; Llu->ToRecv = ToRecv; Llu->ToSendD = ToSendD; Llu->ToSendR = ToSendR; Llu->fmod = fmod; Llu->fsendx_plist = fsendx_plist; Llu->nfrecvx = nfrecvx; Llu->bmod = bmod; Llu->bsendx_plist = bsendx_plist; Llu->nbrecvx = nbrecvx; Llu->ilsum = ilsum; Llu->ldalsum = ldaspa; #if ( PRNTlevel>=1 ) if ( !iam ) printf(".. # L blocks %d\t# U blocks %d\n", nLblocks, nUblocks); #endif SUPERLU_FREE(rb_marker); SUPERLU_FREE(Urb_length); SUPERLU_FREE(Urb_indptr); SUPERLU_FREE(Lrb_length); SUPERLU_FREE(Lrb_number); SUPERLU_FREE(Lrb_indptr); SUPERLU_FREE(Lrb_valptr); SUPERLU_FREE(dense); /* Find the maximum buffer size. */ MPI_Allreduce(mybufmax, Llu->bufmax, NBUFFERS, mpi_int_t, MPI_MAX, grid->comm); } /* if fact == SamePattern_SameRowPerm */ #if ( DEBUGlevel>=1 ) /* Memory allocated but not freed: ilsum, fmod, fsendx_plist, bmod, bsendx_plist */ CHECK_MALLOC(iam, "Exit zdistribute()"); #endif return (mem_use); } /* ZDISTRIBUTE */ SuperLU_DIST_5.3.0/SRC/pdgssvx_ABglobal.c0000644013363400111340000012172013233431301016653 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Solves a system of linear equations A*X=B, * *
 * -- Distributed SuperLU routine (version 4.3) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 *
 * Last modified:
 * December 31, 2015   version 4.3
 * 
*/ #include #include "superlu_ddefs.h" /*! \brief * *
 * Purpose
 * =======
 *
 * pdgssvx_ABglobal solves a system of linear equations A*X=B,
 * by using Gaussian elimination with "static pivoting" to
 * compute the LU factorization of A.
 *
 * Static pivoting is a technique that combines the numerical stability
 * of partial pivoting with the scalability of Cholesky (no pivoting),
 * to run accurately and efficiently on large numbers of processors.
 *
 * See our paper at http://www.nersc.gov/~xiaoye/SuperLU/ for a detailed
 * description of the parallel algorithms.
 *
 * Here are the options for using this code:
 *
 *   1. Independent of all the other options specified below, the
 *      user must supply
 *
 *      -  B, the matrix of right hand sides, and its dimensions ldb and nrhs
 *      -  grid, a structure describing the 2D processor mesh
 *      -  options->IterRefine, which determines whether or not to
 *            improve the accuracy of the computed solution using 
 *            iterative refinement
 *
 *      On output, B is overwritten with the solution X.
 *
 *   2. Depending on options->Fact, the user has several options
 *      for solving A*X=B. The standard option is for factoring
 *      A "from scratch". (The other options, described below,
 *      are used when A is sufficiently similar to a previously 
 *      solved problem to save time by reusing part or all of 
 *      the previous factorization.)
 *
 *      -  options->Fact = DOFACT: A is factored "from scratch"
 *
 *      In this case the user must also supply
 *
 *      -  A, the input matrix
 *
 *      as well as the following options, which are described in more 
 *      detail below:
 *
 *      -  options->Equil,   to specify how to scale the rows and columns
 *                           of A to "equilibrate" it (to try to reduce its
 *                           condition number and so improve the
 *                           accuracy of the computed solution)
 *
 *      -  options->RowPerm, to specify how to permute the rows of A
 *                           (typically to control numerical stability)
 *
 *      -  options->ColPerm, to specify how to permute the columns of A
 *                           (typically to control fill-in and enhance
 *                           parallelism during factorization)
 *
 *      -  options->ReplaceTinyPivot, to specify how to deal with tiny
 *                           pivots encountered during factorization
 *                           (to control numerical stability)
 *
 *      The outputs returned include
 *         
 *      -  ScalePermstruct,  modified to describe how the input matrix A
 *                           was equilibrated and permuted:
 *         -  ScalePermstruct->DiagScale, indicates whether the rows and/or
 *                                        columns of A were scaled
 *         -  ScalePermstruct->R, array of row scale factors
 *         -  ScalePermstruct->C, array of column scale factors
 *         -  ScalePermstruct->perm_r, row permutation vector
 *         -  ScalePermstruct->perm_c, column permutation vector
 *
 *            (part of ScalePermstruct may also need to be supplied on input,
 *             depending on options->RowPerm and options->ColPerm as described 
 *             later).
 *
 *      -  A, the input matrix A overwritten by the scaled and permuted matrix
 *                Pc*Pr*diag(R)*A*diag(C)
 *             where 
 *                Pr and Pc are row and columns permutation matrices determined
 *                  by ScalePermstruct->perm_r and ScalePermstruct->perm_c, 
 *                  respectively, and 
 *                diag(R) and diag(C) are diagonal scaling matrices determined
 *                  by ScalePermstruct->DiagScale, ScalePermstruct->R and 
 *                  ScalePermstruct->C
 *
 *      -  LUstruct, which contains the L and U factorization of A1 where
 *
 *                A1 = Pc*Pr*diag(R)*A*diag(C)*Pc^T = L*U
 *
 *              (Note that A1 = Aout * Pc^T, where Aout is the matrix stored
 *               in A on output.)
 *
 *   3. The second value of options->Fact assumes that a matrix with the same
 *      sparsity pattern as A has already been factored:
 *     
 *      -  options->Fact = SamePattern: A is factored, assuming that it has
 *            the same nonzero pattern as a previously factored matrix. In this
 *            case the algorithm saves time by reusing the previously computed
 *            column permutation vector stored in ScalePermstruct->perm_c
 *            and the "elimination tree" of A stored in LUstruct->etree.
 *
 *      In this case the user must still specify the following options
 *      as before:
 *
 *      -  options->Equil
 *      -  options->RowPerm
 *      -  options->ReplaceTinyPivot
 *
 *      but not options->ColPerm, whose value is ignored. This is because the
 *      previous column permutation from ScalePermstruct->perm_c is used as
 *      input. The user must also supply 
 *
 *      -  A, the input matrix
 *      -  ScalePermstruct->perm_c, the column permutation
 *      -  LUstruct->etree, the elimination tree
 *
 *      The outputs returned include
 *         
 *      -  A, the input matrix A overwritten by the scaled and permuted matrix
 *            as described above
 *      -  ScalePermstruct,  modified to describe how the input matrix A was
 *                           equilibrated and row permuted
 *      -  LUstruct, modified to contain the new L and U factors
 *
 *   4. The third value of options->Fact assumes that a matrix B with the same
 *      sparsity pattern as A has already been factored, and where the
 *      row permutation of B can be reused for A. This is useful when A and B
 *      have similar numerical values, so that the same row permutation
 *      will make both factorizations numerically stable. This lets us reuse
 *      all of the previously computed structure of L and U.
 *
 *      -  options->Fact = SamePattern_SameRowPerm: A is factored,
 *            assuming not only the same nonzero pattern as the previously
 *            factored matrix B, but reusing B's row permutation.
 *
 *      In this case the user must still specify the following options
 *      as before:
 *
 *      -  options->Equil
 *      -  options->ReplaceTinyPivot
 *
 *      but not options->RowPerm or options->ColPerm, whose values are ignored.
 *      This is because the permutations from ScalePermstruct->perm_r and
 *      ScalePermstruct->perm_c are used as input.
 *
 *      The user must also supply 
 *
 *      -  A, the input matrix
 *      -  ScalePermstruct->DiagScale, how the previous matrix was row and/or
 *                                     column scaled
 *      -  ScalePermstruct->R, the row scalings of the previous matrix, if any
 *      -  ScalePermstruct->C, the columns scalings of the previous matrix, 
 *                             if any
 *      -  ScalePermstruct->perm_r, the row permutation of the previous matrix
 *      -  ScalePermstruct->perm_c, the column permutation of the previous 
 *                                  matrix
 *      -  all of LUstruct, the previously computed information about L and U
 *                (the actual numerical values of L and U stored in
 *                 LUstruct->Llu are ignored)
 *
 *      The outputs returned include
 *         
 *      -  A, the input matrix A overwritten by the scaled and permuted matrix
 *            as described above
 *      -  ScalePermstruct,  modified to describe how the input matrix A was
 *                           equilibrated 
 *                  (thus ScalePermstruct->DiagScale, R and C may be modified)
 *      -  LUstruct, modified to contain the new L and U factors
 *
 *   5. The fourth and last value of options->Fact assumes that A is
 *      identical to a matrix that has already been factored on a previous 
 *      call, and reuses its entire LU factorization
 *
 *      -  options->Fact = Factored: A is identical to a previously
 *            factorized matrix, so the entire previous factorization
 *            can be reused.
 *
 *      In this case all the other options mentioned above are ignored
 *      (options->Equil, options->RowPerm, options->ColPerm, 
 *       options->ReplaceTinyPivot)
 *
 *      The user must also supply 
 *
 *      -  A, the unfactored matrix, only in the case that iterative refinement
 *            is to be done (specifically A must be the output A from 
 *            the previous call, so that it has been scaled and permuted)
 *      -  all of ScalePermstruct
 *      -  all of LUstruct, including the actual numerical values of L and U
 *
 *      all of which are unmodified on output.
 *         
 * Arguments
 * =========
 *
 * options (input) superlu_dist_options_t*
 *         The structure defines the input parameters to control
 *         how the LU decomposition will be performed.
 *         The following fields should be defined for this structure:
 *         
 *         o Fact (fact_t)
 *           Specifies whether or not the factored form of the matrix
 *           A is supplied on entry, and if not, how the matrix A should
 *           be factorized based on the previous history.
 *
 *           = DOFACT: The matrix A will be factorized from scratch.
 *                 Inputs:  A
 *                          options->Equil, RowPerm, ColPerm, ReplaceTinyPivot
 *                 Outputs: modified A
 *                             (possibly row and/or column scaled and/or 
 *                              permuted)
 *                          all of ScalePermstruct
 *                          all of LUstruct
 *
 *           = SamePattern: the matrix A will be factorized assuming
 *             that a factorization of a matrix with the same sparsity
 *             pattern was performed prior to this one. Therefore, this
 *             factorization will reuse column permutation vector 
 *             ScalePermstruct->perm_c and the elimination tree
 *             LUstruct->etree
 *                 Inputs:  A
 *                          options->Equil, RowPerm, ReplaceTinyPivot
 *                          ScalePermstruct->perm_c
 *                          LUstruct->etree
 *                 Outputs: modified A
 *                             (possibly row and/or column scaled and/or 
 *                              permuted)
 *                          rest of ScalePermstruct (DiagScale, R, C, perm_r)
 *                          rest of LUstruct (GLU_persist, Llu)
 *
 *           = SamePattern_SameRowPerm: the matrix A will be factorized
 *             assuming that a factorization of a matrix with the same
 *             sparsity	pattern and similar numerical values was performed
 *             prior to this one. Therefore, this factorization will reuse
 *             both row and column scaling factors R and C, and the
 *             both row and column permutation vectors perm_r and perm_c,
 *             distributed data structure set up from the previous symbolic
 *             factorization.
 *                 Inputs:  A
 *                          options->Equil, ReplaceTinyPivot
 *                          all of ScalePermstruct
 *                          all of LUstruct
 *                 Outputs: modified A
 *                             (possibly row and/or column scaled and/or 
 *                              permuted)
 *                          modified LUstruct->Llu
 *           = FACTORED: the matrix A is already factored.
 *                 Inputs:  all of ScalePermstruct
 *                          all of LUstruct
 *
 *         o Equil (yes_no_t)
 *           Specifies whether to equilibrate the system.
 *           = NO:  no equilibration.
 *           = YES: scaling factors are computed to equilibrate the system:
 *                      diag(R)*A*diag(C)*inv(diag(C))*X = diag(R)*B.
 *                  Whether or not the system will be equilibrated depends
 *                  on the scaling of the matrix A, but if equilibration is
 *                  used, A is overwritten by diag(R)*A*diag(C) and B by
 *                  diag(R)*B.
 *
 *         o RowPerm (rowperm_t)
 *           Specifies how to permute rows of the matrix A.
 *           = NATURAL:   use the natural ordering.
 *           = LargeDiag: use the Duff/Koster algorithm to permute rows of
 *                        the original matrix to make the diagonal large
 *                        relative to the off-diagonal.
 *           = MY_PERMR:  use the ordering given in ScalePermstruct->perm_r
 *                        input by the user.
 *           
 *         o ColPerm (colperm_t)
 *           Specifies what type of column permutation to use to reduce fill.
 *           = NATURAL:       natural ordering.
 *           = MMD_AT_PLUS_A: minimum degree ordering on structure of A'+A.
 *           = MMD_ATA:       minimum degree ordering on structure of A'*A.
 *           = MY_PERMC:      the ordering given in ScalePermstruct->perm_c.
 *         
 *         o ReplaceTinyPivot (yes_no_t)
 *           = NO:  do not modify pivots
 *           = YES: replace tiny pivots by sqrt(epsilon)*norm(A) during 
 *                  LU factorization.
 *
 *         o IterRefine (IterRefine_t)
 *           Specifies how to perform iterative refinement.
 *           = NO:     no iterative refinement.
 *           = SLU_DOUBLE: accumulate residual in double precision.
 *           = SLU_EXTRA:  accumulate residual in extra precision.
 *
 *         NOTE: all options must be identical on all processes when
 *               calling this routine.
 *
 * A (input/output) SuperMatrix*
 *         On entry, matrix A in A*X=B, of dimension (A->nrow, A->ncol).
 *         The number of linear equations is A->nrow. The type of A must be:
 *         Stype = SLU_NC; Dtype = SLU_D; Mtype = SLU_GE. That is, A is stored in
 *         compressed column format (also known as Harwell-Boeing format).
 *         See supermatrix.h for the definition of 'SuperMatrix'.
 *         This routine only handles square A, however, the LU factorization
 *         routine pdgstrf can factorize rectangular matrices.
 *         On exit, A may be overwritten by Pc*Pr*diag(R)*A*diag(C),
 *         depending on ScalePermstruct->DiagScale, options->RowPerm and
 *         options->colpem:
 *             if ScalePermstruct->DiagScale != NOEQUIL, A is overwritten by
 *                diag(R)*A*diag(C).
 *             if options->RowPerm != NATURAL, A is further overwritten by
 *                Pr*diag(R)*A*diag(C).
 *             if options->ColPerm != NATURAL, A is further overwritten by
 *                Pc*Pr*diag(R)*A*diag(C).
 *         If all the above condition are true, the LU decomposition is
 *         performed on the matrix Pc*Pr*diag(R)*A*diag(C)*Pc^T.
 *
 *         NOTE: Currently, A must reside in all processes when calling
 *               this routine.
 *
 * ScalePermstruct (input/output) ScalePermstruct_t*
 *         The data structure to store the scaling and permutation vectors
 *         describing the transformations performed to the matrix A.
 *         It contains the following fields:
 *
 *         o DiagScale (DiagScale_t)
 *           Specifies the form of equilibration that was done.
 *           = NOEQUIL: no equilibration.
 *           = ROW:     row equilibration, i.e., A was premultiplied by
 *                      diag(R).
 *           = COL:     Column equilibration, i.e., A was postmultiplied
 *                      by diag(C).
 *           = BOTH:    both row and column equilibration, i.e., A was 
 *                      replaced by diag(R)*A*diag(C).
 *           If options->Fact = FACTORED or SamePattern_SameRowPerm,
 *           DiagScale is an input argument; otherwise it is an output
 *           argument.
 *
 *         o perm_r (int*)
 *           Row permutation vector, which defines the permutation matrix Pr;
 *           perm_r[i] = j means row i of A is in position j in Pr*A.
 *           If options->RowPerm = MY_PERMR, or
 *           options->Fact = SamePattern_SameRowPerm, perm_r is an
 *           input argument; otherwise it is an output argument.
 *
 *         o perm_c (int*)
 *           Column permutation vector, which defines the 
 *           permutation matrix Pc; perm_c[i] = j means column i of A is 
 *           in position j in A*Pc.
 *           If options->ColPerm = MY_PERMC or options->Fact = SamePattern
 *           or options->Fact = SamePattern_SameRowPerm, perm_c is an
 *           input argument; otherwise, it is an output argument.
 *           On exit, perm_c may be overwritten by the product of the input
 *           perm_c and a permutation that postorders the elimination tree
 *           of Pc*A'*A*Pc'; perm_c is not changed if the elimination tree
 *           is already in postorder.
 *
 *         o R (double*) dimension (A->nrow)
 *           The row scale factors for A.
 *           If DiagScale = ROW or BOTH, A is multiplied on the left by 
 *                          diag(R).
 *           If DiagScale = NOEQUIL or COL, R is not defined.
 *           If options->Fact = FACTORED or SamePattern_SameRowPerm, R is
 *           an input argument; otherwise, R is an output argument.
 *
 *         o C (double*) dimension (A->ncol)
 *           The column scale factors for A.
 *           If DiagScale = COL or BOTH, A is multiplied on the right by 
 *                          diag(C).
 *           If DiagScale = NOEQUIL or ROW, C is not defined.
 *           If options->Fact = FACTORED or SamePattern_SameRowPerm, C is
 *           an input argument; otherwise, C is an output argument.
 *         
 * B       (input/output) double*
 *         On entry, the right-hand side matrix of dimension (A->nrow, nrhs).
 *         On exit, the solution matrix if info = 0;
 *
 *         NOTE: Currently, B must reside in all processes when calling
 *               this routine.
 *
 * ldb     (input) int (global)
 *         The leading dimension of matrix B.
 *
 * nrhs    (input) int (global)
 *         The number of right-hand sides.
 *         If nrhs = 0, only LU decomposition is performed, the forward
 *         and back substitutions are skipped.
 *
 * grid    (input) gridinfo_t*
 *         The 2D process mesh. It contains the MPI communicator, the number
 *         of process rows (NPROW), the number of process columns (NPCOL),
 *         and my process rank. It is an input argument to all the
 *         parallel routines.
 *         Grid can be initialized by subroutine SUPERLU_GRIDINIT.
 *         See superlu_ddefs.h for the definition of 'gridinfo_t'.
 *
 * LUstruct (input/output) LUstruct_t*
 *         The data structures to store the distributed L and U factors.
 *         It contains the following fields:
 *
 *         o etree (int*) dimension (A->ncol)
 *           Elimination tree of Pc*(A'+A)*Pc' or Pc*A'*A*Pc', dimension A->ncol.
 *           It is computed in sp_colorder() during the first factorization,
 *           and is reused in the subsequent factorizations of the matrices
 *           with the same nonzero pattern.
 *           On exit of sp_colorder(), the columns of A are permuted so that
 *           the etree is in a certain postorder. This postorder is reflected
 *           in ScalePermstruct->perm_c.
 *           NOTE:
 *           Etree is a vector of parent pointers for a forest whose vertices
 *           are the integers 0 to A->ncol-1; etree[root]==A->ncol.
 *
 *         o Glu_persist (Glu_persist_t*)
 *           Global data structure (xsup, supno) replicated on all processes,
 *           describing the supernode partition in the factored matrices
 *           L and U:
 *	       xsup[s] is the leading column of the s-th supernode,
 *             supno[i] is the supernode number to which column i belongs.
 *
 *         o Llu (LocalLU_t*)
 *           The distributed data structures to store L and U factors.
 *           See superlu_ddefs.h for the definition of 'LocalLU_t'.
 *
 * berr    (output) double*, dimension (nrhs)
 *         The componentwise relative backward error of each solution   
 *         vector X(j) (i.e., the smallest relative change in   
 *         any element of A or B that makes X(j) an exact solution).
 *
 * stat   (output) SuperLUStat_t*
 *        Record the statistics on runtime and floating-point operation count.
 *        See util.h for the definition of 'SuperLUStat_t'.
 *
 * info    (output) int*
 *         = 0: successful exit
 *         > 0: if info = i, and i is
 *             <= A->ncol: U(i,i) is exactly zero. The factorization has
 *                been completed, but the factor U is exactly singular,
 *                so the solution could not be computed.
 *             > A->ncol: number of bytes allocated when memory allocation
 *                failure occurred, plus A->ncol.
 *
 *
 * See superlu_ddefs.h for the definitions of various data types.
 * 
*/ void pdgssvx_ABglobal(superlu_dist_options_t *options, SuperMatrix *A, ScalePermstruct_t *ScalePermstruct, double B[], int ldb, int nrhs, gridinfo_t *grid, LUstruct_t *LUstruct, double *berr, SuperLUStat_t *stat, int *info) { SuperMatrix AC; NCformat *Astore; NCPformat *ACstore; Glu_persist_t *Glu_persist = LUstruct->Glu_persist; Glu_freeable_t *Glu_freeable; /* The nonzero structures of L and U factors, which are replicated on all processrs. (lsub, xlsub) contains the compressed subscript of supernodes in L. (usub, xusub) contains the compressed subscript of nonzero segments in U. If options->Fact != SamePattern_SameRowPerm, they are computed by SYMBFACT routine, and then used by DDISTRIBUTE routine. They will be freed after DDISTRIBUTE routine. If options->Fact == SamePattern_SameRowPerm, these structures are not used. */ fact_t Fact; double *a; int_t *perm_r; /* row permutations from partial pivoting */ int_t *perm_c; /* column permutation vector */ int_t *etree; /* elimination tree */ int_t *colptr, *rowind; int_t Equil, factored, job, notran, colequ, rowequ; int_t i, iinfo, j, irow, m, n, nnz, permc_spec, dist_mem_use; int iam; int ldx; /* LDA for matrix X (global). */ char equed[1], norm[1]; double *C, *R, *C1, *R1, amax, anorm, colcnd, rowcnd; double *X, *b_col, *b_work, *x_col; double t; static superlu_dist_mem_usage_t num_mem_usage, symb_mem_usage; #if ( PRNTlevel>= 2 ) double dmin, dsum, dprod; #endif /* Test input parameters. */ *info = 0; Fact = options->Fact; if ( Fact < 0 || Fact > FACTORED ) *info = -1; else if ( options->RowPerm < 0 || options->RowPerm > MY_PERMR ) *info = -1; else if ( options->ColPerm < 0 || options->ColPerm > MY_PERMC ) *info = -1; else if ( options->IterRefine < 0 || options->IterRefine > SLU_EXTRA ) *info = -1; else if ( options->IterRefine == SLU_EXTRA ) { *info = -1; fprintf(stderr, "Extra precise iterative refinement yet to support."); } else if ( A->nrow != A->ncol || A->nrow < 0 || A->Stype != SLU_NC || A->Dtype != SLU_D || A->Mtype != SLU_GE ) *info = -2; else if ( ldb < A->nrow ) *info = -5; else if ( nrhs < 0 ) *info = -6; if ( *info ) { i = -(*info); pxerr_dist("pdgssvx_ABglobal", grid, -*info); return; } /* Initialization */ factored = (Fact == FACTORED); Equil = (!factored && options->Equil == YES); notran = (options->Trans == NOTRANS); iam = grid->iam; job = 5; m = A->nrow; n = A->ncol; Astore = A->Store; nnz = Astore->nnz; a = Astore->nzval; colptr = Astore->colptr; rowind = Astore->rowind; if ( factored || (Fact == SamePattern_SameRowPerm && Equil) ) { rowequ = (ScalePermstruct->DiagScale == ROW) || (ScalePermstruct->DiagScale == BOTH); colequ = (ScalePermstruct->DiagScale == COL) || (ScalePermstruct->DiagScale == BOTH); } else rowequ = colequ = FALSE; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter pdgssvx_ABglobal()"); #endif perm_r = ScalePermstruct->perm_r; perm_c = ScalePermstruct->perm_c; etree = LUstruct->etree; R = ScalePermstruct->R; C = ScalePermstruct->C; if ( Equil && Fact != SamePattern_SameRowPerm ) { /* Allocate storage if not done so before. */ switch ( ScalePermstruct->DiagScale ) { case NOEQUIL: if ( !(R = (double *) doubleMalloc_dist(m)) ) ABORT("Malloc fails for R[]."); if ( !(C = (double *) doubleMalloc_dist(n)) ) ABORT("Malloc fails for C[]."); ScalePermstruct->R = R; ScalePermstruct->C = C; break; case ROW: if ( !(C = (double *) doubleMalloc_dist(n)) ) ABORT("Malloc fails for C[]."); ScalePermstruct->C = C; break; case COL: if ( !(R = (double *) doubleMalloc_dist(m)) ) ABORT("Malloc fails for R[]."); ScalePermstruct->R = R; break; } } /* ------------------------------------------------------------ Diagonal scaling to equilibrate the matrix. ------------------------------------------------------------*/ if ( Equil ) { #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter equil"); #endif t = SuperLU_timer_(); if ( Fact == SamePattern_SameRowPerm ) { /* Reuse R and C. */ switch ( ScalePermstruct->DiagScale ) { case NOEQUIL: break; case ROW: for (j = 0; j < n; ++j) { for (i = colptr[j]; i < colptr[j+1]; ++i) { irow = rowind[i]; a[i] *= R[irow]; /* Scale rows. */ } } break; case COL: for (j = 0; j < n; ++j) for (i = colptr[j]; i < colptr[j+1]; ++i) a[i] *= C[j]; /* Scale columns. */ break; case BOTH: for (j = 0; j < n; ++j) { for (i = colptr[j]; i < colptr[j+1]; ++i) { irow = rowind[i]; a[i] *= R[irow] * C[j]; /* Scale rows and columns. */ } } break; } } else { if ( !iam ) { /* Compute row and column scalings to equilibrate matrix A. */ dgsequ_dist(A, R, C, &rowcnd, &colcnd, &amax, &iinfo); MPI_Bcast( &iinfo, 1, mpi_int_t, 0, grid->comm ); if ( iinfo == 0 ) { MPI_Bcast( R, m, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( C, n, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( &rowcnd, 1, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( &colcnd, 1, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( &amax, 1, MPI_DOUBLE, 0, grid->comm ); } else { if ( iinfo > 0 ) { if ( iinfo <= m ) { #if ( PRNTlevel>=1 ) fprintf(stderr, "The " IFMT "-th row of A is exactly zero\n", iinfo); #endif } else { #if ( PRNTlevel>=1 ) fprintf(stderr, "The " IFMT "-th column of A is exactly zero\n", iinfo-n); #endif } } } } else { MPI_Bcast( &iinfo, 1, mpi_int_t, 0, grid->comm ); if ( iinfo == 0 ) { MPI_Bcast( R, m, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( C, n, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( &rowcnd, 1, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( &colcnd, 1, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( &amax, 1, MPI_DOUBLE, 0, grid->comm ); } } if ( iinfo == 0 ) { /* Equilibrate matrix A. */ dlaqgs_dist(A, R, C, rowcnd, colcnd, amax, equed); if ( strncmp(equed, "R", 1)==0 ) { ScalePermstruct->DiagScale = ROW; rowequ = ROW; } else if ( strncmp(equed, "C", 1)==0 ) { ScalePermstruct->DiagScale = COL; colequ = COL; } else if ( strncmp(equed, "B", 1)==0 ) { ScalePermstruct->DiagScale = BOTH; rowequ = ROW; colequ = COL; } else ScalePermstruct->DiagScale = NOEQUIL; } #if ( PRNTlevel>=1 ) if ( !iam ) { printf(".. equilibrated? *equed = %c\n", *equed); /*fflush(stdout);*/ } #endif } /* if Fact ... */ stat->utime[EQUIL] = SuperLU_timer_() - t; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit equil"); #endif } /* end if Equil ... */ /* ------------------------------------------------------------ Permute rows of A. ------------------------------------------------------------*/ if ( options->RowPerm != NO ) { t = SuperLU_timer_(); if ( Fact == SamePattern_SameRowPerm /* Reuse perm_r. */ || options->RowPerm == MY_PERMR ) { /* Use my perm_r. */ for (i = 0; i < colptr[n]; ++i) { irow = rowind[i]; rowind[i] = perm_r[irow]; } } else if ( !factored ) { if ( job == 5 ) { /* Allocate storage for scaling factors. */ if ( !(R1 = (double *) SUPERLU_MALLOC(m * sizeof(double))) ) ABORT("SUPERLU_MALLOC fails for R1[]"); if ( !(C1 = (double *) SUPERLU_MALLOC(n * sizeof(double))) ) ABORT("SUPERLU_MALLOC fails for C1[]"); } if ( !iam ) { /* Process 0 finds a row permutation for large diagonal. */ iinfo = dldperm_dist(job, m, nnz, colptr, rowind, a, perm_r, R1, C1); MPI_Bcast( &iinfo, 1, mpi_int_t, 0, grid->comm ); if ( iinfo == 0 ) { MPI_Bcast( perm_r, m, mpi_int_t, 0, grid->comm ); if ( job == 5 && Equil ) { MPI_Bcast( R1, m, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( C1, n, MPI_DOUBLE, 0, grid->comm ); } } } else { MPI_Bcast( &iinfo, 1, mpi_int_t, 0, grid->comm ); if ( iinfo == 0 ) { MPI_Bcast( perm_r, m, mpi_int_t, 0, grid->comm ); if ( job == 5 && Equil ) { MPI_Bcast( R1, m, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( C1, n, MPI_DOUBLE, 0, grid->comm ); } } } if ( iinfo && job == 5) { SUPERLU_FREE(R1); SUPERLU_FREE(C1); } #if ( PRNTlevel>=2 ) dmin = dmach_dist("Overflow"); dsum = 0.0; dprod = 1.0; #endif if ( iinfo == 0 ) { if ( job == 5 ) { if ( Equil ) { for (i = 0; i < n; ++i) { R1[i] = exp(R1[i]); C1[i] = exp(C1[i]); } for (j = 0; j < n; ++j) { for (i = colptr[j]; i < colptr[j+1]; ++i) { irow = rowind[i]; a[i] *= R1[irow] * C1[j]; /* Scale the matrix. */ rowind[i] = perm_r[irow]; #if ( PRNTlevel>=2 ) if ( rowind[i] == j ) /* New diagonal */ dprod *= fabs(a[i]); #endif } } /* Multiply together the scaling factors. */ if ( rowequ ) for (i = 0; i < m; ++i) R[i] *= R1[i]; else for (i = 0; i < m; ++i) R[i] = R1[i]; if ( colequ ) for (i = 0; i < n; ++i) C[i] *= C1[i]; else for (i = 0; i < n; ++i) C[i] = C1[i]; ScalePermstruct->DiagScale = BOTH; rowequ = colequ = 1; } else { /* No equilibration. */ for (i = colptr[0]; i < colptr[n]; ++i) { irow = rowind[i]; rowind[i] = perm_r[irow]; } } SUPERLU_FREE (R1); SUPERLU_FREE (C1); } else { /* job = 2,3,4 */ for (j = 0; j < n; ++j) { for (i = colptr[j]; i < colptr[j+1]; ++i) { irow = rowind[i]; rowind[i] = perm_r[irow]; #if ( PRNTlevel>=2 ) if ( rowind[i] == j ) { /* New diagonal */ if ( job == 2 || job == 3 ) dmin = SUPERLU_MIN(dmin, fabs(a[i])); else if ( job == 4 ) dsum += fabs(a[i]); else if ( job == 5 ) dprod *= fabs(a[i]); } #endif } /* end for i ... */ } /* end for j ... */ } /* end else */ } else { /* if iinfo != 0 */ for (i = 0; i < m; ++i) perm_r[i] = i; } #if ( PRNTlevel>=2 ) if ( job == 2 || job == 3 ) { if ( !iam ) printf("\tsmallest diagonal %e\n", dmin); } else if ( job == 4 ) { if ( !iam ) printf("\tsum of diagonal %e\n", dsum); } else if ( job == 5 ) { if ( !iam ) printf("\t product of diagonal %e\n", dprod); } #endif } /* else !factored */ t = SuperLU_timer_() - t; stat->utime[ROWPERM] = t; #if ( PRNTlevel>=1 ) if ( !iam ) printf(".. LDPERM job " IFMT "\t time: %.2f\n", job, t); #endif } else { /* options->RowPerm == NOROWPERM */ for (i = 0; i < m; ++i) perm_r[i] = i; } if ( !factored || options->IterRefine ) { /* Compute norm(A), which will be used to adjust small diagonal. */ if ( notran ) *(unsigned char *)norm = '1'; else *(unsigned char *)norm = 'I'; anorm = dlangs_dist(norm, A); #if ( PRNTlevel>=1 ) if ( !iam ) printf(".. anorm %e\n", anorm); #endif } /* ------------------------------------------------------------ Perform the LU factorization. ------------------------------------------------------------*/ if ( !factored ) { t = SuperLU_timer_(); /* * Get column permutation vector perm_c[], according to permc_spec: * permc_spec = NATURAL: natural ordering * permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A * permc_spec = MMD_ATA: minimum degree on structure of A'*A * permc_spec = MY_PERMC: the ordering already supplied in perm_c[] */ permc_spec = options->ColPerm; if ( permc_spec != MY_PERMC && Fact == DOFACT ) /* Use an ordering provided by SuperLU */ get_perm_c_dist(iam, permc_spec, A, perm_c); /* Compute the elimination tree of Pc*(A'+A)*Pc' or Pc*A'*A*Pc' (a.k.a. column etree), depending on the choice of ColPerm. Adjust perm_c[] to be consistent with a postorder of etree. Permute columns of A to form A*Pc'. */ sp_colorder(options, A, perm_c, etree, &AC); /* Form Pc*A*Pc' to preserve the diagonal of the matrix Pr*A. */ ACstore = AC.Store; for (j = 0; j < n; ++j) for (i = ACstore->colbeg[j]; i < ACstore->colend[j]; ++i) { irow = ACstore->rowind[i]; ACstore->rowind[i] = perm_c[irow]; } stat->utime[COLPERM] = SuperLU_timer_() - t; /* Perform a symbolic factorization on matrix A and set up the nonzero data structures which are suitable for supernodal GENP. */ if ( Fact != SamePattern_SameRowPerm ) { #if ( PRNTlevel>=1 ) if ( !iam ) printf(".. symbfact(): relax " IFMT ", maxsuper " IFMT ", fill " IFMT "\n", sp_ienv_dist(2), sp_ienv_dist(3), sp_ienv_dist(6)); #endif t = SuperLU_timer_(); if ( !(Glu_freeable = (Glu_freeable_t *) SUPERLU_MALLOC(sizeof(Glu_freeable_t))) ) ABORT("Malloc fails for Glu_freeable."); iinfo = symbfact(options, iam, &AC, perm_c, etree, Glu_persist, Glu_freeable); stat->utime[SYMBFAC] = SuperLU_timer_() - t; if ( iinfo <= 0 ) { QuerySpace_dist(n, -iinfo, Glu_freeable, &symb_mem_usage); #if ( PRNTlevel>=1 ) if ( !iam ) { printf("\tNo of supers %ld\n", (long long)Glu_persist->supno[n-1]+1); printf("\tSize of G(L) %ld\n", (long long)Glu_freeable->xlsub[n]); printf("\tSize of G(U) %ld\n", (long long)Glu_freeable->xusub[n]); printf("\tint %d, short %d, float %d, double %d\n", (int) sizeof(int_t), (int) sizeof(short), (int) sizeof(float), (int) sizeof(double)); printf("\tSYMBfact (MB):\tL\\U %.2f\ttotal %.2f\texpansions " IFMT "\n", symb_mem_usage.for_lu*1e-6, symb_mem_usage.total*1e-6, symb_mem_usage.expansions); } #endif } else { /* symbfact out of memory */ #if ( PRNTlevel>=1 ) if ( !iam ) fprintf(stderr, "symbfact() error returns " IFMT "\n", iinfo); #endif *info = iinfo; return; } } /* Distribute the L and U factors onto the process grid. */ t = SuperLU_timer_(); dist_mem_use = ddistribute(Fact, n, &AC, Glu_freeable, LUstruct, grid); stat->utime[DIST] = SuperLU_timer_() - t; /* Deallocate storage used in symbolic factor. */ if ( Fact != SamePattern_SameRowPerm ) { iinfo = symbfact_SubFree(Glu_freeable); SUPERLU_FREE(Glu_freeable); } /* Perform numerical factorization in parallel. */ t = SuperLU_timer_(); pdgstrf(options, m, n, anorm, LUstruct, grid, stat, info); stat->utime[FACT] = SuperLU_timer_() - t; #if ( PRNTlevel>=1 ) { int_t TinyPivots; float for_lu, total, max, avg, temp; dQuerySpace_dist(n, LUstruct, grid, stat, &num_mem_usage); MPI_Reduce( &num_mem_usage.for_lu, &for_lu, 1, MPI_FLOAT, MPI_SUM, 0, grid->comm ); MPI_Reduce( &num_mem_usage.total, &total, 1, MPI_FLOAT, MPI_SUM, 0, grid->comm ); temp = SUPERLU_MAX(symb_mem_usage.total, symb_mem_usage.for_lu + (float)dist_mem_use + num_mem_usage.for_lu); temp = SUPERLU_MAX(temp, num_mem_usage.total); MPI_Reduce( &temp, &max, 1, MPI_FLOAT, MPI_MAX, 0, grid->comm ); MPI_Reduce( &temp, &avg, 1, MPI_FLOAT, MPI_SUM, 0, grid->comm ); MPI_Allreduce( &stat->TinyPivots, &TinyPivots, 1, mpi_int_t, MPI_SUM, grid->comm ); stat->TinyPivots = TinyPivots; if ( !iam ) { printf("\tNUMfact (MB) all PEs:\tL\\U\t%.2f\tall\t%.2f\n", for_lu*1e-6, total*1e-6); printf("\tAll space (MB):" "\t\ttotal\t%.2f\tAvg\t%.2f\tMax\t%.2f\n", avg*1e-6, avg/grid->nprow/grid->npcol*1e-6, max*1e-6); printf("\tNumber of tiny pivots: %10d\n", stat->TinyPivots); printf(".. pdgstrf INFO = %d\n", *info); } } #endif } else if ( options->IterRefine ) { /* options->Fact==FACTORED */ /* Permute columns of A to form A*Pc' using the existing perm_c. * NOTE: rows of A were previously permuted to Pc*A. */ sp_colorder(options, A, perm_c, NULL, &AC); } /* if !factored ... */ /* ------------------------------------------------------------ Compute the solution matrix X. ------------------------------------------------------------*/ if ( nrhs && *info == 0 ) { if ( !(b_work = doubleMalloc_dist(n)) ) ABORT("Malloc fails for b_work[]"); /* ------------------------------------------------------------ Scale the right-hand side if equilibration was performed. ------------------------------------------------------------*/ if ( notran ) { if ( rowequ ) { b_col = B; for (j = 0; j < nrhs; ++j) { for (i = 0; i < m; ++i) b_col[i] *= R[i]; b_col += ldb; } } } else if ( colequ ) { b_col = B; for (j = 0; j < nrhs; ++j) { for (i = 0; i < m; ++i) b_col[i] *= C[i]; b_col += ldb; } } /* ------------------------------------------------------------ Permute the right-hand side to form Pr*B. ------------------------------------------------------------*/ if ( options->RowPerm != NO ) { if ( notran ) { b_col = B; for (j = 0; j < nrhs; ++j) { for (i = 0; i < m; ++i) b_work[perm_r[i]] = b_col[i]; for (i = 0; i < m; ++i) b_col[i] = b_work[i]; b_col += ldb; } } } /* ------------------------------------------------------------ Permute the right-hand side to form Pc*B. ------------------------------------------------------------*/ if ( notran ) { b_col = B; for (j = 0; j < nrhs; ++j) { for (i = 0; i < m; ++i) b_work[perm_c[i]] = b_col[i]; for (i = 0; i < m; ++i) b_col[i] = b_work[i]; b_col += ldb; } } /* Save a copy of the right-hand side. */ ldx = ldb; if ( !(X = doubleMalloc_dist(((size_t)ldx) * nrhs)) ) ABORT("Malloc fails for X[]"); x_col = X; b_col = B; for (j = 0; j < nrhs; ++j) { for (i = 0; i < ldb; ++i) x_col[i] = b_col[i]; x_col += ldx; b_col += ldb; } /* ------------------------------------------------------------ Solve the linear system. ------------------------------------------------------------*/ pdgstrs_Bglobal(n, LUstruct, grid, X, ldb, nrhs, stat, info); /* ------------------------------------------------------------ Use iterative refinement to improve the computed solution and compute error bounds and backward error estimates for it. ------------------------------------------------------------*/ if ( options->IterRefine ) { /* Improve the solution by iterative refinement. */ t = SuperLU_timer_(); pdgsrfs_ABXglobal(n, &AC, anorm, LUstruct, grid, B, ldb, X, ldx, nrhs, berr, stat, info); stat->utime[REFINE] = SuperLU_timer_() - t; } /* Permute the solution matrix X <= Pc'*X. */ for (j = 0; j < nrhs; j++) { b_col = &B[j*ldb]; x_col = &X[j*ldx]; for (i = 0; i < n; ++i) b_col[i] = x_col[perm_c[i]]; } /* Transform the solution matrix X to a solution of the original system before the equilibration. */ if ( notran ) { if ( colequ ) { b_col = B; for (j = 0; j < nrhs; ++j) { for (i = 0; i < n; ++i) b_col[i] *= C[i]; b_col += ldb; } } } else if ( rowequ ) { b_col = B; for (j = 0; j < nrhs; ++j) { for (i = 0; i < n; ++i) b_col[i] *= R[i]; b_col += ldb; } } SUPERLU_FREE(b_work); SUPERLU_FREE(X); } /* end if nrhs != 0 */ #if ( PRNTlevel>=1 ) if ( !iam ) printf(".. DiagScale = %d\n", ScalePermstruct->DiagScale); #endif /* Deallocate R and/or C if it is not used. */ if ( Equil && Fact != SamePattern_SameRowPerm ) { switch ( ScalePermstruct->DiagScale ) { case NOEQUIL: SUPERLU_FREE(R); SUPERLU_FREE(C); break; case ROW: SUPERLU_FREE(C); break; case COL: SUPERLU_FREE(R); break; } } if ( !factored || (factored && options->IterRefine) ) Destroy_CompCol_Permuted_dist(&AC); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pdgssvx_ABglobal()"); #endif } SuperLU_DIST_5.3.0/SRC/zbinary_io.c0000644013363400111340000000246113233431301015577 0ustar xiaoyessg#include "superlu_zdefs.h" int zread_binary(FILE *fp, int_t *m, int_t *n, int_t *nnz, doublecomplex **nzval, int_t **rowind, int_t **colptr) { size_t isize = sizeof(int_t), dsize = sizeof(double); int nnz_read; fread(n, isize, 1, fp); fread(nnz, isize, 1, fp); printf("fread n %d\tnnz %d\n", *n, *nnz); *m = *n; *colptr = intMalloc_dist(*n+1); *rowind = intMalloc_dist(*nnz); *nzval = doublecomplexMalloc_dist(*nnz); fread(*colptr, isize, (size_t) (*n + 1), fp); fread(*rowind, isize, (size_t) *nnz, fp); nnz_read = fread(*nzval, dsize, (size_t) (2 * (*nnz)), fp); printf("# of doubles fread: %d\n", nnz_read); fclose(fp); } int zwrite_binary(int_t n, int_t nnz, doublecomplex *values, int_t *rowind, int_t *colptr) { FILE *fp1; int nnz_written; size_t isize = sizeof(int_t), dsize = sizeof(double); fp1 = fopen("/scratch/scratchdirs/xiaoye/temp.bin", "wb"); fwrite(&n, isize, 1, fp1); fwrite(&nnz, isize, 1, fp1); fwrite(colptr, isize, n+1, fp1); fwrite(rowind, isize, nnz, fp1); nnz_written = fwrite(values, dsize, 2*nnz, fp1); printf("n %d, # of doublecomplex: %d\n", n, nnz); printf("dump binary file ... # of doubles fwrite: %d\n", nnz_written); assert(nnz_written == 2*nnz); fclose(fp1); } SuperLU_DIST_5.3.0/SRC/dmemory_dist.c0000644013363400111340000001053013233431301016125 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Memory utilities * *
 * -- Distributed SuperLU routine (version 4.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * October 1, 2014
 * 
*/ #include "superlu_ddefs.h" /* Variables external to this file */ extern LU_stack_t stack; void *duser_malloc_dist(int_t bytes, int_t which_end) { void *buf; if ( StackFull(bytes) ) return (NULL); if ( which_end == HEAD ) { buf = (char*) stack.array + stack.top1; stack.top1 += bytes; } else { stack.top2 -= bytes; buf = (char*) stack.array + stack.top2; } stack.used += bytes; return buf; } void duser_free_dist(int_t bytes, int_t which_end) { if ( which_end == HEAD ) { stack.top1 -= bytes; } else { stack.top2 += bytes; } stack.used -= bytes; } /*! \brief * *
 * mem_usage consists of the following fields:
 *    - for_lu (float)
 *      The amount of space used in bytes for the L\U data structures.
 *    - total (float)
 *      The amount of space needed in bytes to perform factorization.
 *    - expansions (int)
 *      Number of memory expansions during the LU factorization.
 * 
*/ int_t dQuerySpace_dist(int_t n, LUstruct_t *LUstruct, gridinfo_t *grid, SuperLUStat_t *stat, superlu_dist_mem_usage_t *mem_usage) { register int_t dword, gb, iword, k, nb, nsupers; int_t *index, *xsup; int iam, mycol, myrow; Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; iam = grid->iam; myrow = MYROW( iam, grid ); mycol = MYCOL( iam, grid ); iword = sizeof(int_t); dword = sizeof(double); nsupers = Glu_persist->supno[n-1] + 1; xsup = Glu_persist->xsup; mem_usage->for_lu = 0.; /* For L factor */ nb = CEILING( nsupers, grid->npcol ); /* Number of local column blocks */ for (k = 0; k < nb; ++k) { gb = k * grid->npcol + mycol; /* Global block number. */ if ( gb < nsupers ) { index = Llu->Lrowind_bc_ptr[k]; if ( index ) { mem_usage->for_lu += (float) ((BC_HEADER + index[0]*LB_DESCRIPTOR + index[1]) * iword); mem_usage->for_lu += (float)(index[1]*SuperSize( gb )*dword); } } } /* For U factor */ nb = CEILING( nsupers, grid->nprow ); /* Number of local row blocks */ for (k = 0; k < nb; ++k) { gb = k * grid->nprow + myrow; /* Global block number. */ if ( gb < nsupers ) { index = Llu->Ufstnz_br_ptr[k]; if ( index ) { mem_usage->for_lu += (float)(index[2] * iword); mem_usage->for_lu += (float)(index[1] * dword); } } } /* Working storage to support factorization */ mem_usage->total = mem_usage->for_lu; #if 0 mem_usage->total += (float)(( Llu->bufmax[0] + Llu->bufmax[2] ) * iword + ( Llu->bufmax[1] + Llu->bufmax[3] + maxsup ) * dword ); /**** another buffer to use mpi_irecv in pdgstrf_irecv.c ****/ mem_usage->total += (float)( Llu->bufmax[0] * iword + Llu->bufmax[1] * dword ); mem_usage->total += (float)( maxsup * maxsup + maxsup) * iword; k = CEILING( nsupers, grid->nprow ); mem_usage->total += (float)(2 * k * iword); #else /*mem_usage->total += stat->current_buffer;*/ mem_usage->total += stat->peak_buffer; #if ( PRNTlevel>=1 ) if (iam==0) printf(".. dQuerySpace: peak_buffer %.2f (MB)\n", stat->peak_buffer * 1.0e-6); #endif #endif return 0; } /* dQuerySpace_dist */ /* * Allocate storage for original matrix A */ void dallocateA_dist(int_t n, int_t nnz, double **a, int_t **asub, int_t **xa) { *a = (double *) doubleMalloc_dist(nnz); *asub = (int_t *) intMalloc_dist(nnz); *xa = (int_t *) intMalloc_dist(n+1); } double *doubleMalloc_dist(int_t n) { double *buf; buf = (double *) SUPERLU_MALLOC( SUPERLU_MAX(1, n) * sizeof(double) ); return (buf); } double *doubleCalloc_dist(int_t n) { double *buf; register int_t i; double zero = 0.0; buf = (double *) SUPERLU_MALLOC( SUPERLU_MAX(1, n) * sizeof(double)); if ( !buf ) return (buf); for (i = 0; i < n; ++i) buf[i] = zero; return (buf); } SuperLU_DIST_5.3.0/SRC/sp_ienv.c0000644013363400111340000000665413233431301015105 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Chooses machine-dependent parameters for the local environment */ /* * File name: sp_ienv.c * History: Modified from lapack routine ILAENV */ #include "superlu_ddefs.h" #include "machines.h" /*! \brief
Purpose ======= sp_ienv_dist() is inquired to choose machine-dependent parameters for the local environment. See ISPEC for a description of the parameters. This version provides a set of parameters which should give good, but not optimal, performance on many of the currently available computers. Users are encouraged to modify this subroutine to set the tuning parameters for their particular machine using the option and problem size information in the arguments. Arguments ========= ISPEC (input) int Specifies the parameter to be returned as the value of SP_IENV_DIST. = 1: the panel size w; a panel consists of w consecutive columns of matrix A in the process of Gaussian elimination. The best value depends on machine's cache characters. = 2: the relaxation parameter relax; if the number of nodes (columns) in a subtree of the elimination tree is less than relax, this subtree is considered as one supernode, regardless of the their row structures. = 3: the maximum size for a supernode, which must be greater than or equal to relaxation parameter (see case 2); = 4: the minimum row dimension for 2-D blocking to be used; = 5: the minimum column dimension for 2-D blocking to be used; = 6: the estimated fills factor for the adjacency structures of L and U, compared with A; = 7: the minimum value of the product M*N*K for a GEMM call to be off-loaded to accelerator (e.g., GPU, Xeon Phi). (SP_IENV_DIST) (output) int >= 0: the value of the parameter specified by ISPEC < 0: if SP_IENV_DIST = -k, the k-th argument had an illegal value. =====================================================================
*/ #include #include int_t sp_ienv_dist(int_t ispec) { // printf(" this function called\n"); int i; char* ttemp; switch (ispec) { #if ( MACH==CRAY_T3E ) case 2: return (6); case 3: return (30); #elif ( MACH==IBM ) case 2: return (20); case 3: return (100); #else case 2: ttemp = getenv("NREL"); if(ttemp) { return(atoi(ttemp)); } else return 20; case 3: ttemp = getenv("NSUP"); if(ttemp) { return(atoi(ttemp)); } else return 128; #endif case 6: ttemp = getenv("FILL"); if ( ttemp ) return(atoi(ttemp)); else return (5); case 7: ttemp = getenv ("N_GEMM"); if (ttemp) return atoi (ttemp); else return 10000; } /* Invalid value for ISPEC */ i = 1; xerr_dist("sp_ienv", &i); return 0; } /* sp_ienv_dist */ SuperLU_DIST_5.3.0/SRC/pzgstrf2.c0000644013363400111340000003426613233431301015223 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Performs panel LU factorization. * *
 * -- Distributed SuperLU routine (version 5.2) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * August 15, 2014
 *
 * Modified:
 *   September 30, 2017
 *
 * 
 * Purpose
 * =======
 *   Panel factorization -- block column k
 *
 *   Factor diagonal and subdiagonal blocks and test for exact singularity.
 *   Only the column processes that own block column *k* participate
 *   in the work.
 *
 * Arguments
 * =========
 * options (input) superlu_dist_options_t* (global)
 *         The structure defines the input parameters to control
 *         how the LU decomposition will be performed.
 *
 * k0     (input) int (global)
 *        Counter of the next supernode to be factorized.
 *
 * k      (input) int (global)
 *        The column number of the block column to be factorized.
 *
 * thresh (input) double (global)
 *        The threshold value = s_eps * anorm.
 *
 * Glu_persist (input) Glu_persist_t*
 *        Global data structures (xsup, supno) replicated on all processes.
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh.
 *
 * Llu    (input/output) LocalLU_t*
 *        Local data structures to store distributed L and U matrices.
 *
 * U_diag_blk_send_req (input/output) MPI_Request*
 *        List of send requests to send down the diagonal block of U.
 *
 * tag_ub (input) int
 *        Upper bound of MPI tag values.
 *
 * stat   (output) SuperLUStat_t*
 *        Record the statistics about the factorization.
 *        See SuperLUStat_t structure defined in util.h.
 *
 * info   (output) int*
 *        = 0: successful exit
 *        < 0: if info = -i, the i-th argument had an illegal value
 *        > 0: if info = i, U(i,i) is exactly zero. The factorization has
 *             been completed, but the factor U is exactly singular,
 *             and division by zero will occur if it is used to solve a
 *             system of equations.
 * 
*/ #include #include "superlu_zdefs.h" /* This pdgstrf2 is based on TRSM function */ void pzgstrf2_trsm (superlu_dist_options_t * options, int_t k0, int_t k, double thresh, Glu_persist_t * Glu_persist, gridinfo_t * grid, LocalLU_t * Llu, MPI_Request * U_diag_blk_send_req, int tag_ub, SuperLUStat_t * stat, int *info) { /* printf("entering pzgstrf2 %d \n", grid->iam); */ int cols_left, iam, l, pkk, pr; int incx = 1, incy = 1; int nsupr; /* number of rows in the block (LDA) */ int nsupc; /* number of columns in the block */ int luptr; int_t i, myrow, krow, j, jfst, jlst, u_diag_cnt; int_t *xsup = Glu_persist->xsup; doublecomplex *lusup, temp; doublecomplex *ujrow, *ublk_ptr; /* pointer to the U block */ doublecomplex one = {1.0, 0.0}, alpha = {-1.0, 0.0}; int_t Pr; MPI_Status status; MPI_Comm comm = (grid->cscp).comm; double t1, t2; /* Initialization. */ iam = grid->iam; Pr = grid->nprow; myrow = MYROW (iam, grid); krow = PROW (k, grid); pkk = PNUM (PROW (k, grid), PCOL (k, grid), grid); j = LBj (k, grid); /* Local block number */ jfst = FstBlockC (k); jlst = FstBlockC (k + 1); lusup = Llu->Lnzval_bc_ptr[j]; nsupc = SuperSize (k); if (Llu->Lrowind_bc_ptr[j]) nsupr = Llu->Lrowind_bc_ptr[j][1]; else nsupr = 0; #ifdef PI_DEBUG printf ("rank %d Iter %d k=%d \t ztrsm nsuper %d \n", iam, k0, k, nsupr); #endif ublk_ptr = ujrow = Llu->ujrow; luptr = 0; /* Point to the diagonal entries. */ cols_left = nsupc; /* supernode size */ int ld_ujrow = nsupc; /* leading dimension of ujrow */ u_diag_cnt = 0; incy = ld_ujrow; if ( U_diag_blk_send_req && U_diag_blk_send_req[myrow] != MPI_REQUEST_NULL ) { /* There are pending sends - wait for all Isend to complete */ #if ( PROFlevel>=1 ) TIC (t1); #endif for (pr = 0; pr < Pr; ++pr) { if (pr != myrow) { MPI_Wait (U_diag_blk_send_req + pr, &status); } } #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_DIAG] += t2; #endif /* flag no more outstanding send request. */ U_diag_blk_send_req[myrow] = MPI_REQUEST_NULL; } if (iam == pkk) { /* diagonal process */ /* ++++ First step compute diagonal block ++++++++++ */ for (j = 0; j < jlst - jfst; ++j) { /* for each column in panel */ /* Diagonal pivot */ i = luptr; if ( options->ReplaceTinyPivot == YES ) { if ( slud_z_abs1(&lusup[i]) < thresh && lusup[i].r != 0.0 && lusup[i].i != 0.0 ) { /* Diagonal */ #if ( PRNTlevel>=2 ) printf ("(%d) .. col %d, tiny pivot %e ", iam, jfst + j, lusup[i]); #endif /* Keep the new diagonal entry with the same sign. */ if ( lusup[i].r < 0 ) lusup[i].r = -thresh; else lusup[i].r = thresh; lusup[i].i = 0.0; #if ( PRNTlevel>=2 ) printf ("replaced by %e\n", lusup[i]); #endif ++(stat->TinyPivots); } } #if 0 for (l = 0; l < cols_left; ++l, i += nsupr, ++u_diag_cnt) ublk_ptr[u_diag_cnt] = lusup[i]; /* copy one row of U */ #endif /* storing U in full form */ int st; for (l = 0; l < cols_left; ++l, i += nsupr, ++u_diag_cnt) { st = j * ld_ujrow + j; ublk_ptr[st + l * ld_ujrow] = lusup[i]; /* copy one row of U */ } /* Test for singularity. */ if ( ujrow[0].r == 0.0 && ujrow[0].i == 0.0 ) { *info = j + jfst + 1; } else { /* Scale the j-th column within diag. block. */ slud_z_div(&temp, &one, &ujrow[0]); for (i = luptr + 1; i < luptr - j + nsupc; ++i) zz_mult(&lusup[i], &lusup[i], &temp); stat->ops[FACT] += 6*(nsupc-j-1) + 10; } /* Rank-1 update of the trailing submatrix within diag. block. */ if (--cols_left) { /* l = nsupr - j - 1; */ l = nsupc - j - 1; /* Piyush */ zgeru_(&l, &cols_left, &alpha, &lusup[luptr+1], &incx, &ujrow[ld_ujrow], &incy, &lusup[luptr + nsupr + 1], &nsupr); stat->ops[FACT] += 8 * l * cols_left; } /* ujrow = ublk_ptr + u_diag_cnt; */ ujrow = ujrow + ld_ujrow + 1; /* move to next row of U */ luptr += nsupr + 1; /* move to next column */ } /* for column j ... first loop */ /* ++++ Second step compute off-diagonal block with communication ++*/ ublk_ptr = ujrow = Llu->ujrow; if (U_diag_blk_send_req && iam == pkk) { /* Send the U block downward */ /** ALWAYS SEND TO ALL OTHERS - TO FIX **/ #if ( PROFlevel>=1 ) TIC (t1); #endif for (pr = 0; pr < Pr; ++pr) { if (pr != krow) { /* tag = ((k0<<2)+2) % tag_ub; */ /* tag = (4*(nsupers+k0)+2) % tag_ub; */ MPI_Isend (ublk_ptr, nsupc * nsupc, SuperLU_MPI_DOUBLE_COMPLEX, pr, SLU_MPI_TAG (4, k0) /* tag */ , comm, U_diag_blk_send_req + pr); } } #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_DIAG] += t2; #endif /* flag outstanding Isend */ U_diag_blk_send_req[krow] = (MPI_Request) TRUE; /* Sherry */ } /* pragma below would be changed by an MKL call */ l = nsupr - nsupc; // n = nsupc; doublecomplex alpha = {1.0, 0.0}; #ifdef PI_DEBUG printf ("calling ztrsm\n"); printf ("ztrsm diagonal param 11: %d \n", nsupr); #endif #if defined (USE_VENDOR_BLAS) ztrsm_ ("R", "U", "N", "N", &l, &nsupc, &alpha, ublk_ptr, &ld_ujrow, &lusup[nsupc], &nsupr, 1, 1, 1, 1); #else ztrsm_ ("R", "U", "N", "N", &l, &nsupc, &alpha, ublk_ptr, &ld_ujrow, &lusup[nsupc], &nsupr); #endif stat->ops[FACT] += 4.0 * ((flops_t) nsupc * (nsupc+1) * l); } else { /* non-diagonal process */ /* ================================================================== * * Receive the diagonal block of U for panel factorization of L(:,k). * * Note: we block for panel factorization of L(:,k), but panel * * factorization of U(:,k) do not block * * ================================================================== */ /* tag = ((k0<<2)+2) % tag_ub; */ /* tag = (4*(nsupers+k0)+2) % tag_ub; */ // printf("hello message receiving%d %d\n",(nsupc*(nsupc+1))>>1,SLU_MPI_TAG(4,k0)); #if ( PROFlevel>=1 ) TIC (t1); #endif MPI_Recv (ublk_ptr, (nsupc * nsupc), SuperLU_MPI_DOUBLE_COMPLEX, krow, SLU_MPI_TAG (4, k0) /* tag */ , comm, &status); #if ( PROFlevel>=1 ) TOC (t2, t1); stat->utime[COMM] += t2; stat->utime[COMM_DIAG] += t2; #endif if (nsupr > 0) { doublecomplex alpha = {1.0, 0.0}; #ifdef PI_DEBUG printf ("ztrsm non diagonal param 11: %d \n", nsupr); if (!lusup) printf (" Rank :%d \t Empty block column occurred :\n", iam); #endif #if defined (USE_VENDOR_BLAS) ztrsm_ ("R", "U", "N", "N", &nsupr, &nsupc, &alpha, ublk_ptr, &ld_ujrow, lusup, &nsupr, 1, 1, 1, 1); #else ztrsm_ ("R", "U", "N", "N", &nsupr, &nsupc, &alpha, ublk_ptr, &ld_ujrow, lusup, &nsupr); #endif stat->ops[FACT] += 4.0 * ((flops_t) nsupc * (nsupc+1) * nsupr); } } /* end if pkk ... */ /* printf("exiting pzgstrf2 %d \n", grid->iam); */ } /* PZGSTRF2_trsm */ /************************************************************************/ void pzgstrs2_omp /************************************************************************/ (int_t k0, int_t k, Glu_persist_t * Glu_persist, gridinfo_t * grid, LocalLU_t * Llu, SuperLUStat_t * stat) { #ifdef PI_DEBUG printf("====Entering pzgstrs2==== \n"); #endif int iam, pkk; int incx = 1; int nsupr; /* number of rows in the block L(:,k) (LDA) */ int segsize; int nsupc; /* number of columns in the block */ int_t luptr, iukp, rukp; int_t b, gb, j, klst, knsupc, lk, nb; int_t *xsup = Glu_persist->xsup; int_t *usub; doublecomplex *lusup, *uval; #if 0 //#ifdef USE_VTUNE __SSC_MARK(0x111);// start SDE tracing, note uses 2 underscores __itt_resume(); // start VTune, again use 2 underscores #endif /* Quick return. */ lk = LBi (k, grid); /* Local block number */ if (!Llu->Unzval_br_ptr[lk]) return; /* Initialization. */ iam = grid->iam; pkk = PNUM (PROW (k, grid), PCOL (k, grid), grid); //int k_row_cycle = k / grid->nprow; /* for which cycle k exist (to assign rowwise thread blocking) */ //int gb_col_cycle; /* cycle through block columns */ klst = FstBlockC (k + 1); knsupc = SuperSize (k); usub = Llu->Ufstnz_br_ptr[lk]; /* index[] of block row U(k,:) */ uval = Llu->Unzval_br_ptr[lk]; if (iam == pkk) { lk = LBj (k, grid); nsupr = Llu->Lrowind_bc_ptr[lk][1]; /* LDA of lusup[] */ lusup = Llu->Lnzval_bc_ptr[lk]; } else { nsupr = Llu->Lsub_buf_2[k0 % (1 + stat->num_look_aheads)][1]; /* LDA of lusup[] */ lusup = Llu->Lval_buf_2[k0 % (1 + stat->num_look_aheads)]; } /////////////////////new-test////////////////////////// /* !! Taken from Carl/SuperLU_DIST_5.1.0/EXAMPLE/pdgstrf2_v3.c !! */ /* Master thread: set up pointers to each block in the row */ nb = usub[0]; iukp = BR_HEADER; rukp = 0; int* blocks_index_pointers = SUPERLU_MALLOC (3 * nb * sizeof(int)); int* blocks_value_pointers = blocks_index_pointers + nb; int* nsupc_temp = blocks_value_pointers + nb; for (b = 0; b < nb; b++) { /* set up pointers to each block */ blocks_index_pointers[b] = iukp + UB_DESCRIPTOR; blocks_value_pointers[b] = rukp; gb = usub[iukp]; rukp += usub[iukp+1]; nsupc = SuperSize( gb ); nsupc_temp[b] = nsupc; iukp += (UB_DESCRIPTOR + nsupc); /* move to the next block */ } // Sherry: this version is more NUMA friendly compared to pdgstrf2_v2.c // https://stackoverflow.com/questions/13065943/task-based-programming-pragma-omp-task-versus-pragma-omp-parallel-for #pragma omp parallel for schedule(static) default(shared) \ private(b,j,iukp,rukp,segsize) /* Loop through all the blocks in the row. */ for (b = 0; b < nb; ++b) { iukp = blocks_index_pointers[b]; rukp = blocks_value_pointers[b]; /* Loop through all the segments in the block. */ for (j = 0; j < nsupc_temp[b]; j++) { segsize = klst - usub[iukp++]; if (segsize) { #pragma omp task default(shared) firstprivate(segsize,rukp) if (segsize > 30) { /* Nonzero segment. */ int_t luptr = (knsupc - segsize) * (nsupr + 1); //printf("[2] segsize %d, nsupr %d\n", segsize, nsupr); #if defined (USE_VENDOR_BLAS) ztrsv_ ("L", "N", "U", &segsize, &lusup[luptr], &nsupr, &uval[rukp], &incx, 1, 1, 1); #else ztrsv_ ("L", "N", "U", &segsize, &lusup[luptr], &nsupr, &uval[rukp], &incx); #endif } /* end task */ rukp += segsize; stat->ops[FACT] += segsize * (segsize + 1); } /* end if segsize > 0 */ } /* end for j in parallel ... */ /* #pragma omp taskwait */ } /* end for b ... */ /* Deallocate memory */ SUPERLU_FREE(blocks_index_pointers); #if 0 //#ifdef USE_VTUNE __itt_pause(); // stop VTune __SSC_MARK(0x222); // stop SDE tracing #endif } /* PZGSTRS2_omp */ SuperLU_DIST_5.3.0/SRC/pzgstrs1.c0000644013363400111340000006545613233431301015244 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Solves a system of distributed linear equations * *
 * -- Distributed SuperLU routine (version 2.3) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * October 15, 2008
 *
 * Modified:
 *     Feburary 7, 2001    use MPI_Isend/MPI_Irecv
 *     October 2, 2001     use MPI_Isend/MPI_Irecv with MPI_Test
 *     October 15, 2008  use fewer MPI_Reduce
 * 
*/ #include "superlu_zdefs.h" #define ISEND_IRECV /* * Function prototypes */ #ifdef _CRAY fortran void CTRSM(_fcd, _fcd, _fcd, _fcd, int*, int*, doublecomplex*, doublecomplex*, int*, doublecomplex*, int*); fortran void SGEMM(_fcd, _fcd, int*, int*, int*, doublecomplex*, doublecomplex*, int*, doublecomplex*, int*, doublecomplex*, doublecomplex*, int*); _fcd ftcs1; _fcd ftcs2; _fcd ftcs3; #endif /*! \brief * *
 * Purpose
 * =======
 *
 * PZGSTRS1 solves a system of distributed linear equations
 *
 *                   op( sub(A) ) * X = sub( B )
 *
 * with a general N-by-N distributed matrix sub( A ) using the LU
 * factorization computed by PZGSTRF.
 *
 * This routine is used only in the iterative refinement routine
 * pzgsrfs_ABXglobal, assuming that the right-hand side is already
 * distributed in the diagonal processes.
 * 
 * Arguments
 * =========
 *
 * n      (input) int (global)
 *        The order of the system of linear equations.
 *
 * LUstruct (input) LUstruct_t*
 *        The distributed data structures to store L and U factors,
 *        and the permutation vectors.
 *        See superlu_ddefs.h for the definition of 'LUstruct_t' structure.
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh.
 *
 * x      (input/output) doublecomplex*
 *        On entry, the right hand side matrix.
 *        On exit, the solution matrix if info = 0;
 *
 *        NOTE: the right-hand side matrix is already distributed on
 *              the diagonal processes.
 *
 * nrhs   (input) int (global)
 *        Number of right-hand sides.
 *
 * stat   (output) SuperLUStat_t*
 *        Record the statistics about the triangular solves; 
 *        See SuperLUStat_t structure defined in util.h.
 *
 * info   (output) int*
 * 	   = 0: successful exit
 *	   < 0: if info = -i, the i-th argument had an illegal value
 * 
*/ void pzgstrs1(int_t n, LUstruct_t *LUstruct, gridinfo_t *grid, doublecomplex *x, int nrhs, SuperLUStat_t *stat, int *info) { Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; doublecomplex alpha = {1.0, 0.0}; doublecomplex zero = {0.0, 0.0}; doublecomplex *lsum; /* Local running sum of the updates to B-components */ doublecomplex *lusup, *dest; doublecomplex *recvbuf, *tempv; doublecomplex *rtemp; /* Result of full matrix-vector multiply. */ int_t **Ufstnz_br_ptr = Llu->Ufstnz_br_ptr; int_t *Urbs, *Urbs1; /* Number of row blocks in each block column of U. */ Ucb_indptr_t **Ucb_indptr;/* Vertical linked list pointing to Uindex[] */ int_t **Ucb_valptr; /* Vertical linked list pointing to Unzval[] */ int iam, kcol, krow, mycol, myrow; int_t i, ii, il, j, k, lb, ljb, lk, lptr, luptr; int_t nb, nlb, nub, nsupers; int_t *xsup, *lsub, *usub; int_t *ilsum; /* Starting position of each supernode in lsum (LOCAL)*/ int_t Pc, Pr; int knsupc, nsupr; int ldalsum; /* Number of lsum entries locally owned. */ int maxrecvsz, p, pi; int_t **Lrowind_bc_ptr; doublecomplex **Lnzval_bc_ptr; MPI_Status status; #ifdef ISEND_IRECV MPI_Request *send_req, recv_req; #endif /*-- Counts used for L-solve --*/ int_t *fmod; /* Modification count for L-solve. */ int_t **fsendx_plist = Llu->fsendx_plist; int_t nfrecvx = Llu->nfrecvx; /* Number of X components to be recv'd. */ int_t *frecv; /* Count of modifications to be recv'd from processes in this row. */ int_t nfrecvmod = 0; /* Count of total modifications to be recv'd. */ int_t nleaf = 0, nroot = 0; /*-- Counts used for U-solve --*/ int_t *bmod; /* Modification count for L-solve. */ int_t **bsendx_plist = Llu->bsendx_plist; int_t nbrecvx = Llu->nbrecvx; /* Number of X components to be recv'd. */ int_t *brecv; /* Count of modifications to be recv'd from processes in this row. */ int_t nbrecvmod = 0; /* Count of total modifications to be recv'd. */ double t; #if ( DEBUGlevel>=2 ) int_t Ublocks = 0; #endif int_t *mod_bit = Llu->mod_bit; /* flag contribution from each row block */ t = SuperLU_timer_(); /* Test input parameters. */ *info = 0; if ( n < 0 ) *info = -1; else if ( nrhs < 0 ) *info = -8; if ( *info ) { pxerr_dist("PZGSTRS1", grid, -*info); return; } /* * Initialization. */ iam = grid->iam; Pc = grid->npcol; Pr = grid->nprow; myrow = MYROW( iam, grid ); mycol = MYCOL( iam, grid ); nsupers = Glu_persist->supno[n-1] + 1; xsup = Glu_persist->xsup; Lrowind_bc_ptr = Llu->Lrowind_bc_ptr; Lnzval_bc_ptr = Llu->Lnzval_bc_ptr; nlb = CEILING( nsupers, Pr ); /* Number of local block rows. */ Llu->SolveMsgSent = 0; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter pzgstrs1()"); #endif /* Save the count to be altered so it can be used by subsequent call to PZGSTRS1. */ if ( !(fmod = intMalloc_dist(nlb)) ) ABORT("Calloc fails for fmod[]."); for (i = 0; i < nlb; ++i) fmod[i] = Llu->fmod[i]; if ( !(frecv = intMalloc_dist(nlb)) ) ABORT("Malloc fails for frecv[]."); Llu->frecv = frecv; #ifdef ISEND_IRECV k = SUPERLU_MAX( Llu->nfsendx, Llu->nbsendx ) + nlb; if ( !(send_req = (MPI_Request*) SUPERLU_MALLOC(k*sizeof(MPI_Request))) ) ABORT("Malloc fails for send_req[]."); #endif #ifdef _CRAY ftcs1 = _cptofcd("L", strlen("L")); ftcs2 = _cptofcd("N", strlen("N")); ftcs3 = _cptofcd("U", strlen("U")); #endif /* Compute ilsum[] and ldalsum for process column 0. */ ilsum = Llu->ilsum; ldalsum = Llu->ldalsum; /* Allocate working storage. */ knsupc = sp_ienv_dist(3); if ( !(lsum = doublecomplexCalloc_dist(((size_t)ldalsum) * nrhs + nlb * LSUM_H)) ) ABORT("Calloc fails for lsum[]."); maxrecvsz = knsupc * nrhs + SUPERLU_MAX(XK_H, LSUM_H); if ( !(recvbuf = doublecomplexMalloc_dist(maxrecvsz)) ) ABORT("Malloc fails for recvbuf[]."); if ( !(rtemp = doublecomplexCalloc_dist(maxrecvsz)) ) ABORT("Malloc fails for rtemp[]."); /*--------------------------------------------------- * Forward solve Ly = b. *---------------------------------------------------*/ /* * Prepended the block number in the header for lsum[]. */ for (k = 0; k < nsupers; ++k) { knsupc = SuperSize( k ); krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ il = LSUM_BLK( lk ); lsum[il - LSUM_H].r = k; lsum[il - LSUM_H].i = 0; } } /* * Compute frecv[] and nfrecvmod counts on the diagonal processes. */ { superlu_scope_t *scp = &grid->rscp; #if 1 for (k = 0; k < nlb; ++k) mod_bit[k] = 0; for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* local block number */ kcol = PCOL( k, grid ); if ( mycol != kcol && fmod[lk] ) mod_bit[lk] = 1; /* contribution from off-diagonal */ } } /*PrintInt10("mod_bit", nlb, mod_bit);*/ /* Every process receives the count, but it is only useful on the diagonal processes. */ MPI_Allreduce( mod_bit, frecv, nlb, mpi_int_t, MPI_SUM, scp->comm ); for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* local block number */ kcol = PCOL( k, grid ); if ( mycol == kcol ) { /* diagonal process */ nfrecvmod += frecv[lk]; if ( !frecv[lk] && !fmod[lk] ) ++nleaf; } } } #else /* old */ for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ kcol = PCOL( k, grid ); /* Root process in this row scope. */ if ( mycol != kcol && fmod[lk] ) i = 1; /* Contribution from non-diagonal process. */ else i = 0; MPI_Reduce( &i, &frecv[lk], 1, mpi_int_t, MPI_SUM, kcol, scp->comm ); if ( mycol == kcol ) { /* Diagonal process. */ nfrecvmod += frecv[lk]; if ( !frecv[lk] && !fmod[lk] ) ++nleaf; #if ( DEBUGlevel>=2 ) printf("(%2d) frecv[%4d] %2d\n", iam, k, frecv[lk]); assert( frecv[lk] < Pc ); #endif } } } #endif } /* --------------------------------------------------------- Solve the leaf nodes first by all the diagonal processes. --------------------------------------------------------- */ #if ( DEBUGlevel>=2 ) printf("(%2d) nleaf %4d\n", iam, nleaf); #endif for (k = 0; k < nsupers && nleaf; ++k) { krow = PROW( k, grid ); kcol = PCOL( k, grid ); if ( myrow == krow && mycol == kcol ) { /* Diagonal process */ knsupc = SuperSize( k ); lk = LBi( k, grid ); if ( !frecv[lk] && !fmod[lk] ) { fmod[lk] = -1; /* Do not solve X[k] in the future. */ ii = X_BLK( lk ); lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY CTRSM(ftcs1, ftcs1, ftcs2, ftcs3, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #elif defined (USE_VENDOR_BLAS) ztrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc, 1, 1, 1, 1); #else ztrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif /*stat->ops[SOLVE] += knsupc * (knsupc - 1) * nrhs;*/ --nleaf; #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ for (p = 0; p < Pr; ++p) if ( fsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); #ifdef ISEND_IRECV MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm, &send_req[Llu->SolveMsgSent++]); #else MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm ); #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii-XK_H], pi); #endif } /* * Perform local block modifications: lsum[i] -= L_i,k * X[k] */ nb = lsub[0] - 1; lptr = BC_HEADER + LB_DESCRIPTOR + knsupc; luptr = knsupc; /* Skip diagonal block L(k,k). */ zlsum_fmod(lsum, x, &x[ii], rtemp, nrhs, knsupc, k, fmod, nb, lptr, luptr, xsup, grid, Llu, send_req, stat); } } /* if diagonal process ... */ } /* for k ... */ /* * Compute the internal nodes asynchronously by all processes. */ #if ( DEBUGlevel>=2 ) printf("(%2d) nfrecvx %4d, nfrecvmod %4d, nleaf %4d\n", iam, nfrecvx, nfrecvmod, nleaf); #endif while ( nfrecvx || nfrecvmod ) { /* While not finished. */ /* Receive a message. */ #ifdef ISEND_IRECV /* -MPI- FATAL: Remote protocol queue full */ MPI_Irecv( recvbuf, maxrecvsz, SuperLU_MPI_DOUBLE_COMPLEX, MPI_ANY_SOURCE, MPI_ANY_TAG, grid->comm, &recv_req ); MPI_Wait( &recv_req, &status ); #else MPI_Recv( recvbuf, maxrecvsz, SuperLU_MPI_DOUBLE_COMPLEX, MPI_ANY_SOURCE, MPI_ANY_TAG, grid->comm, &status ); #endif k = (*recvbuf).r; #if ( DEBUGlevel>=2 ) printf("(%2d) Recv'd block %d, tag %2d\n", iam, k, status.MPI_TAG); #endif switch ( status.MPI_TAG ) { case Xk: --nfrecvx; lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; if ( lsub ) { nb = lsub[0]; lptr = BC_HEADER; luptr = 0; knsupc = SuperSize( k ); /* * Perform local block modifications: lsum[i] -= L_i,k * X[k] */ zlsum_fmod(lsum, x, &recvbuf[XK_H], rtemp, nrhs, knsupc, k, fmod, nb, lptr, luptr, xsup, grid, Llu, send_req, stat); } /* if lsub */ break; case LSUM: --nfrecvmod; lk = LBi( k, grid ); /* Local block number, row-wise. */ ii = X_BLK( lk ); knsupc = SuperSize( k ); tempv = &recvbuf[LSUM_H]; RHS_ITERATE(j) for (i = 0; i < knsupc; ++i) z_add(&x[i + ii + j*knsupc], &x[i + ii + j*knsupc], &tempv[i + j*knsupc]); if ( (--frecv[lk])==0 && fmod[lk]==0 ) { fmod[lk] = -1; /* Do not solve X[k] in the future. */ lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY CTRSM(ftcs1, ftcs1, ftcs2, ftcs3, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #elif defined (USE_VENDOR_BLAS) ztrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc, 1, 1, 1, 1); #else ztrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif /*stat->ops[SOLVE] += knsupc * (knsupc - 1) * nrhs;*/ #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ kcol = PCOL( k, grid ); for (p = 0; p < Pr; ++p) if ( fsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); #ifdef ISEND_IRECV MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm, &send_req[Llu->SolveMsgSent++] ); #else MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm ); #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii-XK_H], pi); #endif } /* * Perform local block modifications. */ nb = lsub[0] - 1; lptr = BC_HEADER + LB_DESCRIPTOR + knsupc; luptr = knsupc; /* Skip diagonal block L(k,k). */ zlsum_fmod(lsum, x, &x[ii], rtemp, nrhs, knsupc, k, fmod, nb, lptr, luptr, xsup, grid, Llu, send_req, stat); } /* if */ break; #if ( DEBUGlevel>=2 ) default: printf("(%2d) Recv'd wrong message tag %4d\n", iam, status.MPI_TAG); break; #endif } /* switch */ } /* while not finished ... */ #if ( PRNTlevel>=2 ) t = SuperLU_timer_() - t; if ( !iam ) printf(".. L-solve time\t%8.2f\n", t); t = SuperLU_timer_(); #endif #if ( DEBUGlevel>=2 ) if ( !iam ) printf("\n.. After L-solve: y =\n"); for (i = 0, k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); kcol = PCOL( k, grid ); if ( myrow == krow && mycol == kcol ) { /* Diagonal process */ knsupc = SuperSize( k ); lk = LBi( k, grid ); ii = X_BLK( lk ); for (j = 0; j < knsupc; ++j) printf("\t(%d)\t%4d\t%.10f\n", iam, xsup[k]+j, x[ii+j]); } MPI_Barrier( grid->comm ); } #endif SUPERLU_FREE(fmod); SUPERLU_FREE(frecv); SUPERLU_FREE(rtemp); #ifdef ISEND_IRECV for (i = 0; i < Llu->SolveMsgSent; ++i) MPI_Request_free(&send_req[i]); Llu->SolveMsgSent = 0; #endif /*--------------------------------------------------- * Back solve Ux = y. * * The Y components from the forward solve is already * on the diagonal processes. *---------------------------------------------------*/ /* Save the count to be altered so it can be used by subsequent call to PZGSTRS1. */ if ( !(bmod = intMalloc_dist(nlb)) ) ABORT("Calloc fails for bmod[]."); for (i = 0; i < nlb; ++i) bmod[i] = Llu->bmod[i]; if ( !(brecv = intMalloc_dist(nlb)) ) ABORT("Malloc fails for brecv[]."); Llu->brecv = brecv; /* * Compute brecv[] and nbrecvmod counts on the diagonal processes. */ { superlu_scope_t *scp = &grid->rscp; #if 1 for (k = 0; k < nlb; ++k) mod_bit[k] = 0; for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ kcol = PCOL( k, grid ); /* Root process in this row scope. */ if ( mycol != kcol && bmod[lk] ) mod_bit[lk] = 1; /* Contribution from off-diagonal */ } } /* Every process receives the count, but it is only useful on the diagonal processes. */ MPI_Allreduce( mod_bit, brecv, nlb, mpi_int_t, MPI_SUM, scp->comm ); for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ kcol = PCOL( k, grid ); /* Root process in this row scope. */ if ( mycol == kcol ) { /* Diagonal process. */ nbrecvmod += brecv[lk]; if ( !brecv[lk] && !bmod[lk] ) ++nroot; #if ( DEBUGlevel>=2 ) printf("(%2d) brecv[%4d] %2d\n", iam, k, brecv[lk]); assert( brecv[lk] < Pc ); #endif } } } #else for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ kcol = PCOL( k, grid ); /* Root process in this row scope. */ if ( mycol != kcol && bmod[lk] ) i = 1; /* Contribution from non-diagonal process. */ else i = 0; MPI_Reduce( &i, &brecv[lk], 1, mpi_int_t, MPI_SUM, kcol, scp->comm ); if ( mycol == kcol ) { /* Diagonal process. */ nbrecvmod += brecv[lk]; if ( !brecv[lk] && !bmod[lk] ) ++nroot; #if ( DEBUGlevel>=2 ) printf("(%2d) brecv[%4d] %2d\n", iam, k, brecv[lk]); assert( brecv[lk] < Pc ); #endif } } } #endif } /* Re-initialize lsum to zero. Each block header is already in place. */ for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { knsupc = SuperSize( k ); lk = LBi( k, grid ); il = LSUM_BLK( lk ); dest = &lsum[il]; RHS_ITERATE(j) for (i = 0; i < knsupc; ++i) dest[i + j*knsupc] = zero; } } /* Set up additional pointers for the index and value arrays of U. nlb is the number of local block rows. */ nub = CEILING( nsupers, Pc ); /* Number of local block columns. */ if ( !(Urbs = (int_t *) intCalloc_dist(2*((size_t)nub))) ) ABORT("Malloc fails for Urbs[]"); /* Record number of nonzero blocks in a block column. */ Urbs1 = Urbs + nub; if ( !(Ucb_indptr = SUPERLU_MALLOC(nub * sizeof(Ucb_indptr_t *))) ) ABORT("Malloc fails for Ucb_indptr[]"); if ( !(Ucb_valptr = SUPERLU_MALLOC(nub * sizeof(int_t *))) ) ABORT("Malloc fails for Ucb_valptr[]"); /* Count number of row blocks in a block column. One pass of the skeleton graph of U. */ for (lk = 0; lk < nlb; ++lk) { usub = Ufstnz_br_ptr[lk]; if ( usub ) { /* Not an empty block row. */ /* usub[0] -- number of column blocks in this block row. */ #if ( DEBUGlevel>=2 ) Ublocks += usub[0]; #endif i = BR_HEADER; /* Pointer in index array. */ for (lb = 0; lb < usub[0]; ++lb) { /* For all column blocks. */ k = usub[i]; /* Global block number */ ++Urbs[LBj(k,grid)]; i += UB_DESCRIPTOR + SuperSize( k ); } } } /* Set up the vertical linked lists for the row blocks. One pass of the skeleton graph of U. */ for (lb = 0; lb < nub; ++lb) if ( Urbs[lb] ) { /* Not an empty block column. */ if ( !(Ucb_indptr[lb] = SUPERLU_MALLOC(Urbs[lb] * sizeof(Ucb_indptr_t))) ) ABORT("Malloc fails for Ucb_indptr[lb][]"); if ( !(Ucb_valptr[lb] = (int_t *) intMalloc_dist(Urbs[lb])) ) ABORT("Malloc fails for Ucb_valptr[lb][]"); } for (lk = 0; lk < nlb; ++lk) { /* For each block row. */ usub = Ufstnz_br_ptr[lk]; if ( usub ) { /* Not an empty block row. */ i = BR_HEADER; /* Pointer in index array. */ j = 0; /* Pointer in nzval array. */ for (lb = 0; lb < usub[0]; ++lb) { /* For all column blocks. */ k = usub[i]; /* Global block number, column-wise. */ ljb = LBj( k, grid ); /* Local block number, column-wise. */ Ucb_indptr[ljb][Urbs1[ljb]].lbnum = lk; Ucb_indptr[ljb][Urbs1[ljb]].indpos = i; Ucb_valptr[ljb][Urbs1[ljb]] = j; ++Urbs1[ljb]; j += usub[i+1]; i += UB_DESCRIPTOR + SuperSize( k ); } } } #if ( DEBUGlevel>=2 ) for (p = 0; p < Pr*Pc; ++p) { if (iam == p) { printf("(%2d) .. Ublocks %d\n", iam, Ublocks); for (lb = 0; lb < nub; ++lb) { printf("(%2d) Local col %2d: # row blocks %2d\n", iam, lb, Urbs[lb]); if ( Urbs[lb] ) { for (i = 0; i < Urbs[lb]; ++i) printf("(%2d) .. row blk %2d:\ lbnum %d, indpos %d, valpos %d\n", iam, i, Ucb_indptr[lb][i].lbnum, Ucb_indptr[lb][i].indpos, Ucb_valptr[lb][i]); } } } MPI_Barrier( grid->comm ); } for (p = 0; p < Pr*Pc; ++p) { if ( iam == p ) { printf("\n(%d) bsendx_plist[][]", iam); for (lb = 0; lb < nub; ++lb) { printf("\n(%d) .. local col %2d: ", iam, lb); for (i = 0; i < Pr; ++i) printf("%4d", bsendx_plist[lb][i]); } printf("\n"); } MPI_Barrier( grid->comm ); } #endif /* DEBUGlevel */ #if ( PRNTlevel>=2 ) t = SuperLU_timer_() - t; if ( !iam) printf(".. Setup U-solve time\t%8.2f\n", t); t = SuperLU_timer_(); #endif /* * Solve the roots first by all the diagonal processes. */ #if ( DEBUGlevel>=2 ) printf("(%2d) nroot %4d\n", iam, nroot); #endif for (k = nsupers-1; k >= 0 && nroot; --k) { krow = PROW( k, grid ); kcol = PCOL( k, grid ); if ( myrow == krow && mycol == kcol ) { /* Diagonal process. */ knsupc = SuperSize( k ); lk = LBi( k, grid ); /* Local block number, row-wise. */ if ( !brecv[lk] && !bmod[lk] ) { bmod[lk] = -1; /* Do not solve X[k] in the future. */ ii = X_BLK( lk ); lk = LBj( k, grid ); /* Local block number, column-wise */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY CTRSM(ftcs1, ftcs3, ftcs2, ftcs2, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #elif defined (USE_VENDOR_BLAS) ztrsm_("L", "U", "N", "N", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc, 1, 1, 1, 1); #else ztrsm_("L", "U", "N", "N", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif /*stat->ops[SOLVE] += knsupc * (knsupc + 1) * nrhs;*/ --nroot; #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ for (p = 0; p < Pr; ++p) if ( bsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); #ifdef ISEND_IRECV MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm, &send_req[Llu->SolveMsgSent++] ); #else MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm ); #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii-XK_H], pi); #endif } /* * Perform local block modifications: lsum[i] -= U_i,k * X[k] */ if ( Urbs[lk] ) zlsum_bmod(lsum, x, &x[ii], nrhs, k, bmod, Urbs, Ucb_indptr, Ucb_valptr, xsup, grid, Llu, send_req, stat); } /* if root ... */ } /* if diagonal process ... */ } /* for k ... */ /* * Compute the internal nodes asynchronously by all processes. */ while ( nbrecvx || nbrecvmod ) { /* While not finished. */ /* Receive a message. */ MPI_Recv( recvbuf, maxrecvsz, SuperLU_MPI_DOUBLE_COMPLEX, MPI_ANY_SOURCE, MPI_ANY_TAG, grid->comm, &status ); k = (*recvbuf).r; #if ( DEBUGlevel>=2 ) printf("(%2d) Recv'd block %d, tag %2d\n", iam, k, status.MPI_TAG); #endif switch ( status.MPI_TAG ) { case Xk: --nbrecvx; lk = LBj( k, grid ); /* Local block number, column-wise. */ /* * Perform local block modifications: * lsum[i] -= U_i,k * X[k] */ zlsum_bmod(lsum, x, &recvbuf[XK_H], nrhs, k, bmod, Urbs, Ucb_indptr, Ucb_valptr, xsup, grid, Llu, send_req, stat); break; case LSUM: --nbrecvmod; lk = LBi( k, grid ); /* Local block number, row-wise. */ ii = X_BLK( lk ); knsupc = SuperSize( k ); tempv = &recvbuf[LSUM_H]; RHS_ITERATE(j) for (i = 0; i < knsupc; ++i) z_add(&x[i + ii + j*knsupc], &x[i + ii + j*knsupc], &tempv[i + j*knsupc]); if ( !(--brecv[lk]) && !bmod[lk] ) { bmod[lk] = -1; /* Do not solve X[k] in the future. */ lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY CTRSM(ftcs1, ftcs3, ftcs2, ftcs2, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #elif defined (USE_VENDOR_BLAS) ztrsm_("L", "U", "N", "N", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc, 1, 1, 1, 1); #else ztrsm_("L", "U", "N", "N", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif /*stat->ops[SOLVE] += knsupc * (knsupc + 1) * nrhs;*/ #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ kcol = PCOL( k, grid ); for (p = 0; p < Pr; ++p) if ( bsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); #ifdef ISEND_IRECV MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm, &send_req[Llu->SolveMsgSent++] ); #else MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm ); #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii - XK_H], pi); #endif } /* * Perform local block modifications: * lsum[i] -= U_i,k * X[k] */ if ( Urbs[lk] ) zlsum_bmod(lsum, x, &x[ii], nrhs, k, bmod, Urbs, Ucb_indptr, Ucb_valptr, xsup, grid, Llu, send_req, stat); } /* if becomes solvable */ break; #if ( DEBUGlevel>=2 ) default: printf("(%2d) Recv'd wrong message tag %4d\n", iam, status.MPI_TAG); break; #endif } /* switch */ } /* while not finished ... */ #if ( PRNTlevel>=2 ) t = SuperLU_timer_() - t; if ( !iam ) printf(".. U-solve time\t%8.2f\n", t); #endif stat->utime[SOLVE] = SuperLU_timer_() - t; /* Deallocate storage. */ SUPERLU_FREE(lsum); SUPERLU_FREE(recvbuf); for (i = 0; i < nub; ++i) if ( Urbs[i] ) { SUPERLU_FREE(Ucb_indptr[i]); SUPERLU_FREE(Ucb_valptr[i]); } SUPERLU_FREE(Ucb_indptr); SUPERLU_FREE(Ucb_valptr); SUPERLU_FREE(Urbs); SUPERLU_FREE(bmod); SUPERLU_FREE(brecv); #ifdef ISEND_IRECV for (i = 0; i < Llu->SolveMsgSent; ++i) MPI_Request_free(&send_req[i]); SUPERLU_FREE(send_req); #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pzgstrs1()"); #endif } /* PZGSTRS1 */ SuperLU_DIST_5.3.0/SRC/wingetopt.c0000644013363400111340000000255513233431301015456 0ustar xiaoyessg/* ***************************************************************** * * Copyright 2016 Microsoft * * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Obtained from https://github.com/iotivity/iotivity/tree/master/resource/c_common/windows * ******************************************************************/ #include "wingetopt.h" #include char* optarg = NULL; int optind = 1; int getopt(int argc, char *const argv[], const char *optstring) { if ((optind >= argc) || (argv[optind][0] != '-') || (argv[optind][0] == 0)) { return -1; } int opt = argv[optind][1]; const char *p = strchr(optstring, opt); if (p == NULL) { return '?'; } if (p[1] == ':') { optind++; if (optind >= argc) { return '?'; } optarg = argv[optind]; optind++; } return opt; } SuperLU_DIST_5.3.0/SRC/wingetopt.h0000644013363400111340000000201713233431301015454 0ustar xiaoyessg/* ***************************************************************** * * Copyright 2016 Microsoft * * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Obtained from https://github.com/iotivity/iotivity/tree/master/resource/c_common/windows * ******************************************************************/ #ifndef WINGETOPT_H__ #define WINGETOPT_H__ #ifdef __cplusplus extern "C" { #endif extern char *optarg; extern int optind; int getopt(int argc, char *const argv[], const char *optstring); #ifdef __cplusplus } #endif #endif SuperLU_DIST_5.3.0/SRC/dSchCompUdt-2Ddynamic.c0000644013363400111340000006032613233431301017423 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief This file contains the main loop of pdgstrf which involves rank k * update of the Schur complement. * Uses 2D partitioning for the scatter phase. * *
 * -- Distributed SuperLU routine (version 5.2) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * October 1, 2014
 *
 * Modified: September 14, 2017
 *   - First gather U-panel, then depending on "ldu" (excluding leading zeros), 
 *     gather only trailing columns of the L-panel corresponding to the nonzero
 *     of U-rows.
 *   - Padding zeros for nice dimensions of GEMM.
 *
 */

#define SCHEDULE_STRATEGY guided 

/* 
 * Buffers:
 *     [ lookAhead_L_buff | Remain_L_buff ] : stores the gathered L-panel
 *                                            (A matrix in C := A*B )
 *     bigU : stores the U-panel (B matrix in C := A*B)
 *     bigV : stores the block GEMM result (C matrix in C := A*B)
 */

if ( msg0 && msg2 ) { /* L(:,k) and U(k,:) are not empty. */
    int cum_nrow = 0; /* cumulative number of nonzero rows in L(:,k) */
    int temp_nbrow;   /* nonzero rows in current block L(i,k) */
    lptr  = lptr0;
    luptr = luptr0;
    int Lnbrow, Rnbrow; /* number of nonzero rows in look-ahead window,
			   and remaining part.  */

    /*******************************************************************
     * Separating L blocks into the top part within look-ahead window
     * and the remaining ones.
     *******************************************************************/

     int lookAheadBlk=0, RemainBlk=0;

     tt_start = SuperLU_timer_();

     /* Sherry -- can this loop be threaded?? */
     /* Loop through all blocks in L(:,k) to set up pointers to the start 
      * of each block in the data arrays.
      *   - lookAheadFullRow[i] := number of nonzero rows from block 0 to i
      *   - lookAheadStRow[i] := number of nonzero rows before block i
      *   - lookAhead_lptr[i] := point to the start of block i in L's index[] 
      *   - (ditto Remain_Info[i])
      */
     for (int i = 0; i < nlb; ++i) {
	 ib = lsub[lptr];            /* Block number of L(i,k). */
	 temp_nbrow = lsub[lptr+1];  /* Number of full rows. */
        
	 int look_up_flag = 1; /* assume ib is outside look-up window */
	 for (int j = k0+1; j < SUPERLU_MIN (k0 + num_look_aheads+2, nsupers );
	      ++j) {
		 if ( ib == perm_c_supno[j] ) {
		     look_up_flag = 0; /* flag ib within look-up window */
                     break;            /* Sherry -- can exit the loop?? */
                 }
	 }
	 
	 if ( look_up_flag == 0 ) { /* ib is within look-up window */
	     if (lookAheadBlk==0) {
		 lookAheadFullRow[lookAheadBlk] = temp_nbrow;
	     } else {
		 lookAheadFullRow[lookAheadBlk] = 
		     temp_nbrow + lookAheadFullRow[lookAheadBlk-1];   
	     }
	     lookAheadStRow[lookAheadBlk] = cum_nrow;
	     lookAhead_lptr[lookAheadBlk] = lptr;
	     lookAhead_ib[lookAheadBlk] = ib; 
	     lookAheadBlk++;
	 } else { /* ib is not in look-up window */
	     if ( RemainBlk==0 ) {
		 Remain_info[RemainBlk].FullRow = temp_nbrow;
	     } else {
		 Remain_info[RemainBlk].FullRow = 
		     temp_nbrow + Remain_info[RemainBlk-1].FullRow;   
	     }
             RemainStRow[RemainBlk] = cum_nrow;
             // Remain_lptr[RemainBlk] = lptr;
	     Remain_info[RemainBlk].lptr = lptr;
	     // Remain_ib[RemainBlk] = ib; 
	     Remain_info[RemainBlk].ib = ib; 
	     RemainBlk++;
	 }
	 
         cum_nrow += temp_nbrow;
	 
	 lptr += LB_DESCRIPTOR;  /* Skip descriptor. */
	 lptr += temp_nbrow;     /* Move to next block */
	 luptr += temp_nbrow;
     }  /* for i ... set up pointers for all blocks in L(:,k) */

     lptr = lptr0;
     luptr = luptr0;

     /* leading dimension of L look-ahead buffer, same as Lnbrow */
     //int LDlookAhead_LBuff = lookAheadBlk==0 ? 0 :lookAheadFullRow[lookAheadBlk-1];
     Lnbrow = lookAheadBlk==0 ? 0 : lookAheadFullRow[lookAheadBlk-1];
     /* leading dimension of L remaining buffer, same as Rnbrow */
     //int LDRemain_LBuff = RemainBlk==0 ? 0 : Remain_info[RemainBlk-1].FullRow;
     Rnbrow = RemainBlk==0 ? 0 : Remain_info[RemainBlk-1].FullRow;
     /* assert( cum_nrow == (LDlookAhead_LBuff + LDRemain_LBuff) );*/
     /* Piyush fix */
     //int LDlookAhead_LBuff = lookAheadBlk==0? 0 : lookAheadFullRow[lookAheadBlk-1];

     nbrow = Lnbrow + Rnbrow; /* total number of rows in L */
     LookAheadRowSepMOP += 2*knsupc*(nbrow);

     /***********************************************
      * Gather U blocks (AFTER LOOK-AHEAD WINDOW)   *
      ***********************************************/
     tt_start = SuperLU_timer_();

     if ( nbrow > 0 ) { /* L(:,k) is not empty */
	 /*
	  * Counting U blocks
	  */
	 ldu = 0; /* Calculate ldu for U(k,:) after look-ahead window. */
	 ncols = 0; /* Total number of nonzero columns in U(k,:) */
	 int temp_ncols = 0;

	 /* jj0 contains the look-ahead window that was updated in 
	    dlook_ahead_update.c. Now the search can continue from that point,
	    not to start from block 0. */
#if 0
	 iukp = iukp0; /* point to the first block in index[] */
	 rukp = rukp0; /* point to the start of nzval[] */
#else
	 /* Save pointers at location right after look-ahead window
	    for later restart. */
	 iukp0 = iukp;
	 rukp0 = rukp;
#endif

	 /* if ( iam==0 ) printf("--- k0 %d, k %d, jj0 %d, nub %d\n", k0, k, jj0, nub);*/
	     
         /* 
	  * Loop through all blocks in U(k,:) to set up pointers to the start
          * of each block in the data arrays, store them in Ublock_info[j]
          * for block U(k,j).
  	  */
	 for (j = jj0; j < nub; ++j) { /* jj0 starts after look-ahead window. */
	     temp_ncols = 0;
#if 0
	     /* Sherry - can remove following call, since perm_u == Identity  */
	     arrive_at_ublock(
			      j, &iukp, &rukp, &jb, &ljb, &nsupc,
			      iukp0, rukp0, usub, perm_u, xsup, grid
			      );
#else
	     jb = usub[iukp];
	     /* ljb = LBj (jb, grid);   Local block number of U(k,j). */
	     nsupc = SuperSize(jb);
	     iukp += UB_DESCRIPTOR; /* Start fstnz of block U(k,j). */
#endif
	     Ublock_info[j].iukp = iukp;
	     Ublock_info[j].rukp = rukp;
	     Ublock_info[j].jb = jb;

	     /* if ( iam==0 )
		 printf("j %d: Ublock_info[j].iukp %d, Ublock_info[j].rukp %d,"
			"Ublock_info[j].jb %d, nsupc %d\n", 
			j, Ublock_info[j].iukp, Ublock_info[j].rukp,
			Ublock_info[j].jb, nsupc); */

	     /* Prepare to call GEMM. */
	     jj = iukp;
	     for (; jj < iukp+nsupc; ++jj) {
		 segsize = klst - usub[jj];
		 if ( segsize ) {
                    ++temp_ncols;
                    if ( segsize > ldu ) ldu = segsize;
		 }
	     }

	     Ublock_info[j].full_u_cols = temp_ncols;
	     ncols += temp_ncols;
#if 1	     
	     /* Jump number of nonzeros in block U(k,jj);
		Move to block U(k,j+1) in nzval[] array.  */
	     rukp += usub[iukp - 1];
	     iukp += nsupc;
#endif
         } /* end for j ... compute ldu & ncols */

	 /* Now doing prefix sum on full_u_cols.
	  * After this, full_u_cols is the number of nonzero columns
          * from block 0 to block j.
          */
	 for ( j = jj0+1; j < nub; ++j) {
	     Ublock_info[j].full_u_cols += Ublock_info[j-1].full_u_cols;
	 }
            
	 /* Padding zeros to make {m,n,k} multiple of vector length. */
	 jj = 8; //n;
	 if (gemm_padding > 0 && Rnbrow > jj && ncols > jj && ldu > jj) {
	     gemm_m_pad = Rnbrow + (Rnbrow % GEMM_PADLEN);
	     gemm_n_pad = ncols + (ncols % GEMM_PADLEN);
	     //gemm_n_pad = ncols;
	     //gemm_k_pad = ldu + (ldu % GEMM_PADLEN);
	     gemm_k_pad = ldu;
	     
	     for (i = Rnbrow; i < gemm_m_pad; ++i)  // padding A matrix
		 for (j = 0; j < gemm_k_pad; ++j)
		     Remain_L_buff[i + j*gemm_m_pad] = zero;
	     for (i = 0; i < Rnbrow; ++i)         
		 for (j = ldu; j < gemm_k_pad; ++j)
		     Remain_L_buff[i + j*gemm_m_pad] = zero;
	     for (i = ldu; i < gemm_k_pad; ++i)     // padding B matrix
		 for (j = 0; j < gemm_n_pad; ++j)
		     bigU[i + j*gemm_k_pad] = zero;
	     for (i = 0; i < ldu; ++i)
		 for (j = ncols; j < gemm_n_pad; ++j)
		     bigU[i + j*gemm_k_pad] = zero;
	 } else {
	     gemm_m_pad = Rnbrow;
	     gemm_n_pad = ncols;
	     gemm_k_pad = ldu;
	 }
     
	 tempu = bigU; /* buffer the entire row block U(k,:) */

         /* Gather U(k,:) into buffer bigU[] to prepare for GEMM */
#ifdef _OPENMP
#pragma omp parallel for firstprivate(iukp, rukp) \
    private(j,tempu, jb, nsupc,ljb,segsize, lead_zero, jj, i) \
    default (shared) schedule(SCHEDULE_STRATEGY)
#endif
        for (j = jj0; j < nub; ++j) { /* jj0 starts after look-ahead window. */

            if (j==jj0) tempu = bigU;
            //else tempu = bigU + ldu * Ublock_info[j-1].full_u_cols;
            else tempu = bigU + gemm_k_pad * Ublock_info[j-1].full_u_cols;

            /* == processing each of the remaining columns in parallel == */
#if 0
	    /* Sherry - can remove following call, since perm_u == Identity  */
            arrive_at_ublock(j, &iukp, &rukp, &jb, &ljb, &nsupc,
			     iukp0, rukp0, usub,perm_u, xsup, grid);
#else
	    iukp = Ublock_info[j].iukp;
	    rukp = Ublock_info[j].rukp;
	    jb = Ublock_info[j].jb;
	    nsupc = SuperSize (jb );
#endif
            /* Copy from U(k,j) to tempu[], padding zeros.  */            
            for (jj = iukp; jj < iukp+nsupc; ++jj) {
                segsize = klst - usub[jj];
                if ( segsize ) {
                    lead_zero = ldu - segsize;
                    for (i = 0; i < lead_zero; ++i) tempu[i] = zero;
		    //tempu += lead_zero;
#if (_OPENMP>=201307)
#pragma omp simd
#endif
		    for (i=0; i0), end gather U blocks */

    GatherUTimer += SuperLU_timer_() - tt_start;
    GatherMOP += 2*ldu*ncols;
    int jj_cpu = nub;       /* limit between CPU and GPU */
    int thread_id;
    /*tempv = bigV;*/


    /**********************
     * Gather L blocks    *
     **********************/
     tt_start = SuperLU_timer_();

     /* Loop through the look-ahead blocks to copy Lval into the buffer */
#ifdef _OPENMP
#pragma omp parallel for private(j,jj,tempu,tempv) default (shared)
#endif
     for (i = 0; i < lookAheadBlk; ++i) {
	 int StRowDest, temp_nbrow;
	 if ( i==0 ) {
	     StRowDest = 0;
	     temp_nbrow = lookAheadFullRow[0];
	 } else {
	     StRowDest   = lookAheadFullRow[i-1];
	     temp_nbrow  = lookAheadFullRow[i]-lookAheadFullRow[i-1];
	 }
	 
	 int StRowSource = lookAheadStRow[i];
	 
	 /* Now copying one block into L lookahead buffer */
	 /* #pragma omp parallel for (gives slow down) */
	 // for (int j = 0; j < knsupc; ++j) { 
	 for (j = knsupc-ldu; j < knsupc; ++j) { /* skip leading columns
						    corresponding to zero U rows */
#if 1
	     /* Better let compiler generate memcpy or vectorized code. */
	     //tempu = &lookAhead_L_buff[StRowDest + j*LDlookAhead_LBuff];
	     //tempu = &lookAhead_L_buff[StRowDest + j * Lnbrow];
	     tempu = &lookAhead_L_buff[StRowDest + (j - (knsupc-ldu)) * Lnbrow];
	     tempv = &lusup[luptr+j*nsupr + StRowSource];
#if (_OPENMP>=201307)
#pragma omp simd
#endif
	     for (jj = 0; jj < temp_nbrow; ++jj) tempu[jj] = tempv[jj];
#else
	     //memcpy(&lookAhead_L_buff[StRowDest + j*LDlookAhead_LBuff],
	     memcpy(&lookAhead_L_buff[StRowDest + (j - (knsupc-ldu)) * Lnbrow],
		    &lusup[luptr+j*nsupr + StRowSource],
		    temp_nbrow * sizeof(double) );
#endif
	 } /* end for j ... */
     } /* parallel for i ... gather Lval blocks from lookahead window */

     /* Loop through the remaining blocks to copy Lval into the buffer */
#ifdef _OPENMP
#pragma omp parallel for private(i,j,jj,tempu,tempv) default (shared) \
    schedule(SCHEDULE_STRATEGY)
#endif
     for (int i = 0; i < RemainBlk; ++i) {
         int StRowDest, temp_nbrow;
         if ( i==0 )  {
	     StRowDest  = 0;
	     temp_nbrow = Remain_info[0].FullRow;
	 } else  {
	     StRowDest   = Remain_info[i-1].FullRow;
	     temp_nbrow  = Remain_info[i].FullRow - Remain_info[i-1].FullRow;
	 }

	 int StRowSource = RemainStRow[i];

	 /* Now copying a block into L remaining buffer */
	 // #pragma omp parallel for (gives slow down)
	 // for (int j = 0; j < knsupc; ++j) {
	 for (int j = knsupc-ldu; j < knsupc; ++j) {
	     // printf("StRowDest %d Rnbrow %d StRowSource %d \n", StRowDest,Rnbrow ,StRowSource);
#if 1
	     /* Better let compiler generate memcpy or vectorized code. */
	     //tempu = &Remain_L_buff[StRowDest + j*LDRemain_LBuff];
	     //tempu = &Remain_L_buff[StRowDest + (j - (knsupc-ldu)) * Rnbrow];
	     tempu = &Remain_L_buff[StRowDest + (j - (knsupc-ldu)) * gemm_m_pad];
	     tempv = &lusup[luptr + j*nsupr + StRowSource];
#if (_OPENMP>=201307)
#pragma omp simd
#endif
	     for (jj = 0; jj < temp_nbrow; ++jj) tempu[jj] = tempv[jj];
#else
	     //memcpy(&Remain_L_buff[StRowDest + j*LDRemain_LBuff],
	     memcpy(&Remain_L_buff[StRowDest + (j - (knsupc-ldu)) * gemm_m_pad],
		    &lusup[luptr+j*nsupr + StRowSource],
                    temp_nbrow * sizeof(double) );
#endif
	 } /* end for j ... */
     } /* parallel for i ... copy Lval into the remaining buffer */

     tt_end = SuperLU_timer_();
     GatherLTimer += tt_end - tt_start;


     /*************************************************************************
      * Perform GEMM (look-ahead L part, and remain L part) followed by Scatter
      *************************************************************************/
     tempu = bigU;  /* setting to the start of padded U(k,:) */
    
     if ( Lnbrow>0 && ldu>0 && ncols>0 ) { /* Both L(:,k) and U(k,:) nonempty */
	 /***************************************************************
	  * Updating blocks in look-ahead window of the LU(look-ahead-rows,:)
	  ***************************************************************/

	 /* Count flops for total GEMM calls */
	 ncols = Ublock_info[nub-1].full_u_cols;
 	 flops_t flps = 2.0 * (flops_t)Lnbrow * ldu * ncols;
	 LookAheadScatterMOP += 3 * Lnbrow * ncols; /* scatter-add */
	 schur_flop_counter += flps;
	 stat->ops[FACT]    += flps;
	 LookAheadGEMMFlOp  += flps;

#ifdef _OPENMP
#pragma omp parallel default (shared) private(thread_id)
	 {
	   thread_id = omp_get_thread_num();
 
	   /* Ideally, should organize the loop as:
	      for (j = 0; j < nub; ++j) {
	          for (lb = 0; lb < lookAheadBlk; ++lb) {
	               L(lb,k) X U(k,j) -> tempv[]
		  }
	      }
	      But now, we use collapsed loop to achieve more parallelism.
	      Total number of block updates is:
	      (# of lookAheadBlk in L(:,k)) X (# of blocks in U(k,:))
	   */

	   int i = sizeof(int);
	   int* indirect_thread    = indirect + (ldt + CACHELINE/i) * thread_id;
	   int* indirect2_thread   = indirect2 + (ldt + CACHELINE/i) * thread_id;

#pragma omp for \
    private (nsupc,ljb,lptr,ib,temp_nbrow,cum_nrow)	\
    schedule(dynamic)
#else /* not use _OPENMP */
	   thread_id = 0;
	   int* indirect_thread    = indirect;
	   int* indirect2_thread   = indirect2;
#endif
	   /* Each thread is assigned one loop index ij, responsible for 
	      block update L(lb,k) * U(k,j) -> tempv[]. */
	   for (int ij = 0; ij < lookAheadBlk*(nub-jj0); ++ij) {
	       /* jj0 starts after look-ahead window. */
            int j   = ij/lookAheadBlk + jj0;
            int lb  = ij%lookAheadBlk;

            /* Getting U block U(k,j) information */
            /* unsigned long long ut_start, ut_end; */
            int_t rukp =  Ublock_info[j].rukp;
            int_t iukp =  Ublock_info[j].iukp;
            int jb   =  Ublock_info[j].jb;
            int nsupc = SuperSize(jb);
            int ljb = LBj (jb, grid);  /* destination column block */
            int st_col;
            int ncols;  /* Local variable counts only columns in the block */
            if ( j > jj0 ) { /* jj0 starts after look-ahead window. */
                ncols  = Ublock_info[j].full_u_cols-Ublock_info[j-1].full_u_cols;
                st_col = Ublock_info[j-1].full_u_cols;
            } else {
                ncols  = Ublock_info[j].full_u_cols;
                st_col = 0;   
            }

            /* Getting L block L(i,k) information */
            int_t lptr = lookAhead_lptr[lb];
            int ib   = lookAhead_ib[lb];
            int temp_nbrow = lsub[lptr+1];
            lptr += LB_DESCRIPTOR;
            int cum_nrow = (lb==0 ? 0 : lookAheadFullRow[lb-1]);

	    /* Block-by-block GEMM in look-ahead window */
#if 0
	    i = sizeof(double);
	    double* tempv1 = bigV + thread_id * (ldt*ldt + CACHELINE/i);
#else
	    double* tempv1 = bigV + thread_id * (ldt*ldt);
#endif

#if ( PRNTlevel>= 1)
	    if (thread_id == 0) tt_start = SuperLU_timer_();
	    gemm_max_m = SUPERLU_MAX(gemm_max_m, temp_nbrow);
	    gemm_max_n = SUPERLU_MAX(gemm_max_n, ncols);
	    gemm_max_k = SUPERLU_MAX(gemm_max_k, ldu);
#endif

#if defined (USE_VENDOR_BLAS)            
            dgemm_("N", "N", &temp_nbrow, &ncols, &ldu, &alpha,
		   //&lookAhead_L_buff[(knsupc-ldu)*Lnbrow+cum_nrow], &Lnbrow,
		   &lookAhead_L_buff[cum_nrow], &Lnbrow,
		   &tempu[st_col*ldu], &ldu, &beta, tempv1, &temp_nbrow, 1, 1);
#else
            dgemm_("N", "N", &temp_nbrow, &ncols, &ldu, &alpha,
		   //&lookAhead_L_buff[(knsupc-ldu)*Lnbrow+cum_nrow], &Lnbrow,
		   &lookAhead_L_buff[cum_nrow], &Lnbrow,
		   &tempu[st_col*ldu], &ldu, &beta, tempv1, &temp_nbrow);
#endif

#if (PRNTlevel>=1 )
	    if (thread_id == 0) {
		tt_end = SuperLU_timer_();
		LookAheadGEMMTimer += tt_end - tt_start;
		tt_start = tt_end;
	    }
#endif
            if ( ib < jb ) {
                dscatter_u (
				 ib, jb,
				 nsupc, iukp, xsup,
				 klst, temp_nbrow,
				 lptr, temp_nbrow, lsub,
				 usub, tempv1,
				 Ufstnz_br_ptr, Unzval_br_ptr,
				 grid
			        );
            } else {
#if 0
		//#ifdef USE_VTUNE
	    __SSC_MARK(0x111);// start SDE tracing, note uses 2 underscores
	    __itt_resume(); // start VTune, again use 2 underscores
#endif
                dscatter_l (
				 ib, ljb, 
				 nsupc, iukp, xsup,
 				 klst, temp_nbrow,
				 lptr, temp_nbrow,
				 usub, lsub, tempv1,
				 indirect_thread, indirect2_thread,
				 Lrowind_bc_ptr, Lnzval_bc_ptr,
				 grid
				);
#if 0
		//#ifdef USE_VTUNE
		__itt_pause(); // stop VTune
		__SSC_MARK(0x222); // stop SDE tracing
#endif
            }

#if ( PRNTlevel>=1 )
	    if (thread_id == 0) 
		LookAheadScatterTimer += SuperLU_timer_() - tt_start;
#endif
	   } /* end omp for ij = ... */

#ifdef _OPENMP
	 } /* end omp parallel */
#endif
     } /* end if Lnbrow>0 ... look-ahead GEMM and scatter */

    /***************************************************************
     * Updating remaining rows and columns on CPU.
     ***************************************************************/
    ncols = jj_cpu==0 ? 0 : Ublock_info[jj_cpu-1].full_u_cols;

    if ( Rnbrow>0 && ldu>0 ) { /* There are still blocks remaining ... */
	double flps = 2.0 * (double)Rnbrow * ldu * ncols;
	schur_flop_counter  += flps;
	stat->ops[FACT]     += flps;

#if ( PRNTlevel>=1 )
	RemainGEMM_flops += flps;
	gemm_max_m = SUPERLU_MAX(gemm_max_m, Rnbrow);
	gemm_max_n = SUPERLU_MAX(gemm_max_n, ncols);
	gemm_max_k = SUPERLU_MAX(gemm_max_k, ldu);
	tt_start = SuperLU_timer_();
	/* printf("[%d] .. k0 %d, before large GEMM: %d-%d-%d, RemainBlk %d\n",
	   iam, k0,Rnbrow,ldu,ncols,RemainBlk);  fflush(stdout);
	assert( Rnbrow*ncols < bigv_size ); */
#endif
	/* calling aggregated large GEMM, result stored in bigV[]. */
#if defined (USE_VENDOR_BLAS)
	//dgemm_("N", "N", &Rnbrow, &ncols, &ldu, &alpha,
	dgemm_("N", "N", &gemm_m_pad, &gemm_n_pad, &gemm_k_pad, &alpha,
	       //&Remain_L_buff[(knsupc-ldu)*Rnbrow], &Rnbrow,
	       &Remain_L_buff[0], &gemm_m_pad,
	       &bigU[0], &gemm_k_pad, &beta, bigV, &gemm_m_pad, 1, 1);
#else
	//dgemm_("N", "N", &Rnbrow, &ncols, &ldu, &alpha,
	dgemm_("N", "N", &gemm_m_pad, &gemm_n_pad, &gemm_k_pad, &alpha,
	       //&Remain_L_buff[(knsupc-ldu)*Rnbrow], &Rnbrow,
	       &Remain_L_buff[0], &gemm_m_pad,
	       &bigU[0], &gemm_k_pad, &beta, bigV, &gemm_m_pad);
#endif

#if ( PRNTlevel>=1 )
	tt_end = SuperLU_timer_();
	RemainGEMMTimer += tt_end - tt_start;
#if ( PROFlevel>=1 )
	//fprintf(fgemm, "%8d%8d%8d %16.8e\n", Rnbrow, ncols, ldu,
	// (tt_end - tt_start)*1e6); // time in microsecond
	//fflush(fgemm);
	gemm_stats[gemm_count].m = Rnbrow;
	gemm_stats[gemm_count].n = ncols;
	gemm_stats[gemm_count].k = ldu;
	gemm_stats[gemm_count++].microseconds = (tt_end - tt_start) * 1e6;
#endif
	tt_start = SuperLU_timer_();
#endif

#ifdef USE_VTUNE
	__SSC_MARK(0x111);// start SDE tracing, note uses 2 underscores
	__itt_resume(); // start VTune, again use 2 underscores
#endif

	/* Scatter into destination block-by-block. */
#ifdef _OPENMP
#pragma omp parallel default(shared) private(thread_id)
	{
	    thread_id = omp_get_thread_num();
 
	    /* Ideally, should organize the loop as:
               for (j = 0; j < jj_cpu; ++j) {
	           for (lb = 0; lb < RemainBlk; ++lb) {
	               L(lb,k) X U(k,j) -> tempv[]
                   }
               }
	       But now, we use collapsed loop to achieve more parallelism.
	       Total number of block updates is:
	       (# of RemainBlk in L(:,k)) X (# of blocks in U(k,:))
	    */

	    int i = sizeof(int);
	    int* indirect_thread = indirect + (ldt + CACHELINE/i) * thread_id;
	    int* indirect2_thread = indirect2 + (ldt + CACHELINE/i) * thread_id;

#pragma omp for \
    private (j,lb,rukp,iukp,jb,nsupc,ljb,lptr,ib,temp_nbrow,cum_nrow)	\
    schedule(dynamic)
#else /* not use _OPENMP */
	    thread_id = 0;
	    int* indirect_thread = indirect;
	    int* indirect2_thread = indirect2;
#endif
	    /* Each thread is assigned one loop index ij, responsible for 
	       block update L(lb,k) * U(k,j) -> tempv[]. */
	    for (int ij = 0; ij < RemainBlk*(jj_cpu-jj0); ++ij) {
		/* jj_cpu := nub, jj0 starts after look-ahead window. */
		int j   = ij / RemainBlk + jj0; /* j-th block in U panel */
		int lb  = ij % RemainBlk;       /* lb-th block in L panel */

		/* Getting U block U(k,j) information */
		/* unsigned long long ut_start, ut_end; */
		int_t rukp =  Ublock_info[j].rukp;
		int_t iukp =  Ublock_info[j].iukp;
		int jb   =  Ublock_info[j].jb;
		int nsupc = SuperSize(jb);
		int ljb = LBj (jb, grid);
		int st_col;
		int ncols;
		if ( j>jj0 ) {
		    ncols = Ublock_info[j].full_u_cols - Ublock_info[j-1].full_u_cols;
		    st_col = Ublock_info[j-1].full_u_cols;
		} else {
		    ncols = Ublock_info[j].full_u_cols;
		    st_col = 0;   
		}

		/* Getting L block L(i,k) information */
		int_t lptr = Remain_info[lb].lptr;
		int ib   = Remain_info[lb].ib;
		int temp_nbrow = lsub[lptr+1];
		lptr += LB_DESCRIPTOR;
		int cum_nrow = (lb==0 ? 0 : Remain_info[lb-1].FullRow);
		
		/* tempv1 points to block(i,j) in bigV : LDA == Rnbrow */
		//double* tempv1 = bigV + (st_col * Rnbrow + cum_nrow); Sherry 
		double* tempv1 = bigV + (st_col * gemm_m_pad + cum_nrow); /* Sherry */

		// printf("[%d] .. before scatter: ib %d, jb %d, temp_nbrow %d, Rnbrow %d\n", iam, ib, jb, temp_nbrow, Rnbrow); fflush(stdout);

		/* Now scattering the block */

		if ( ib < jb ) {
		    dscatter_u (
				ib, jb,
				nsupc, iukp, xsup,
				//klst, Rnbrow, /*** klst, temp_nbrow, Sherry */
				klst, gemm_m_pad, /*** klst, temp_nbrow, Sherry */
				lptr, temp_nbrow, /* row dimension of the block */
				lsub, usub, tempv1,
				Ufstnz_br_ptr, Unzval_br_ptr,
				grid
				);
		} else {
		    dscatter_l(
			       ib, ljb,
			       nsupc, iukp, xsup,
			       //klst, temp_nbrow, Sherry
			       klst, gemm_m_pad, /*** temp_nbrow, Sherry */
			       lptr, temp_nbrow, /* row dimension of the block */
			       usub, lsub, tempv1,
			       indirect_thread, indirect2_thread,
			       Lrowind_bc_ptr,Lnzval_bc_ptr,
			       grid
			       );
		}
		
	    } /* end omp for (int ij =...) */
	    
#ifdef _OPENMP
	} /* end omp parallel region */
#endif
	
#if ( PRNTlevel>=1 )
	RemainScatterTimer += SuperLU_timer_() - tt_start;
#endif

#ifdef USE_VTUNE
	__itt_pause(); // stop VTune
	__SSC_MARK(0x222); // stop SDE tracing
#endif

    } /* end if Rnbrow>0 ... update remaining block */

}  /* end if L(:,k) and U(k,:) are not empty */
SuperLU_DIST_5.3.0/SRC/memory.c0000644013363400111340000003654513233431301014754 0ustar  xiaoyessg/*! \file
Copyright (c) 2003, The Regents of the University of California, through
Lawrence Berkeley National Laboratory (subject to receipt of any required 
approvals from U.S. Dept. of Energy) 

All rights reserved. 

The source code is distributed under BSD license, see the file License.txt
at the top-level directory.
*/
/*! @file
 * \brief Memory utilities
 *
 * 
 * -- Distributed SuperLU routine (version 5.2) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 * 
 * Modified:
 *   September 30, 2017, add aligned malloc for Intel
 * 
*/ #include "superlu_ddefs.h" /* * Global variables */ ExpHeader *expanders; /* Array of pointers to 4 types of memory */ LU_stack_t stack; int_t no_expand; /* * Prototype */ static int_t memory_usage(const int_t, const int_t, const int_t); static void *expand(int_t *, MemType, int_t, int_t, Glu_freeable_t *); /* * Internal prototypes */ void SetupSpace (void *, int_t, LU_space_t *); void superlu_abort_and_exit_dist(char *msg) { /*fprintf(stderr, msg); fflush(stderr);*/ printf("%s", msg); exit (-1); } long int superlu_malloc_total = 0; #if ( DEBUGlevel>=1 ) /* Debug malloc/free. */ #define PAD_FACTOR 2 #define DWORD (sizeof(double)) /* Be sure it's no smaller than double. */ void *superlu_malloc_dist(size_t size) { char *buf; int iam; MPI_Comm_rank(MPI_COMM_WORLD, &iam); buf = (char *) malloc(size + DWORD); if ( !buf ) { printf("(%d) superlu_malloc fails: malloc_total %.0f MB, size %lld\n", iam, superlu_malloc_total*1e-6, size); ABORT("superlu_malloc: out of memory"); } ((size_t *) buf)[0] = size; #if 0 superlu_malloc_total += size + DWORD; #else superlu_malloc_total += size; #endif return (void *) (buf + DWORD); } void superlu_free_dist(void *addr) { char *p = ((char *) addr) - DWORD; if ( !addr ) ABORT("superlu_free: tried to free NULL pointer"); if ( !p ) ABORT("superlu_free: tried to free NULL+DWORD pointer"); { int_t n = ((size_t *) p)[0]; if ( !n ) ABORT("superlu_free: tried to free a freed pointer"); *((size_t *) p) = 0; /* Set to zero to detect duplicate free's. */ #if 0 superlu_malloc_total -= (n + DWORD); #else superlu_malloc_total -= n; #endif if ( superlu_malloc_total < 0 ) ABORT("superlu_malloc_total went negative"); /*free (addr);*/ free (p); } } #else /* The production mode. */ #if defined (__INTEL_COMPILER) #include void * superlu_malloc_dist(size_t size) { void* ptr; int alignment = 1<<12; // align at 4K page if (size > 1<<19 ) { alignment=1<<21; } return (_mm_malloc(size, alignment)); } void superlu_free_dist(void * ptr) { _mm_free(ptr); } // #elif (_POSIX_C_SOURCE>=200112L) // // void * MALLOC(size_t size) {void* ptr;int alignment=1<<12;if(size>1<<19){alignment=1<<21;}posix_memalign( (void**)&(ptr), alignment, size );return(ptr);} //void FREE(void * ptr) {free(ptr);} #else // normal malloc/free void *superlu_malloc_dist(size_t size) { void *buf; buf = (void *) malloc(size); return (buf); } void superlu_free_dist(void *addr) { free (addr); } #endif #endif /* End debug malloc/free. */ static void copy_mem_int(int_t howmany, void *old, void *new) { register int_t i; int_t *iold = old; int_t *inew = new; for (i = 0; i < howmany; i++) inew[i] = iold[i]; } static void user_bcopy(char *src, char *dest, int_t bytes) { char *s_ptr, *d_ptr; s_ptr = src + bytes - 1; d_ptr = dest + bytes - 1; for (; d_ptr >= dest; --s_ptr, --d_ptr ) *d_ptr = *s_ptr; } int_t *intMalloc_dist(int_t n) { int_t *buf; buf = (int_t *) SUPERLU_MALLOC((size_t) SUPERLU_MAX(1,n) * sizeof(int_t)); return (buf); } int_t *intCalloc_dist(int_t n) { int_t *buf; register int_t i; buf = (int_t *) SUPERLU_MALLOC((size_t) SUPERLU_MAX(1,n) * sizeof(int_t)); if ( buf ) for (i = 0; i < n; ++i) buf[i] = 0; return (buf); } void *user_malloc_dist(int_t bytes, int_t which_end) { void *buf; if ( StackFull(bytes) ) return (NULL); if ( which_end == HEAD ) { buf = (char*) stack.array + stack.top1; stack.top1 += bytes; } else { stack.top2 -= bytes; buf = (char*) stack.array + stack.top2; } stack.used += bytes; return buf; } void user_free_dist(int_t bytes, int_t which_end) { if ( which_end == HEAD ) { stack.top1 -= bytes; } else { stack.top2 += bytes; } stack.used -= bytes; } /*! \brief * *
 * Setup the memory model to be used for factorization.
 *    lwork = 0: use system malloc;
 *    lwork > 0: use user-supplied work[] space.
 * 
*/ void SetupSpace(void *work, int_t lwork, LU_space_t *MemModel) { if ( lwork == 0 ) { *MemModel = SYSTEM; /* malloc/free */ } else if ( lwork > 0 ) { *MemModel = USER; /* user provided space */ stack.used = 0; stack.top1 = 0; stack.top2 = (lwork/4)*4; /* must be word addressable */ stack.size = stack.top2; stack.array = (void *) work; } } /************************************************************************/ /*! \brief * *
 * Allocate storage for the data structures common to symbolic factorization
 * routines. For those unpredictable size, make a guess as FILL * nnz(A).
 * Return value:
 *     If lwork = -1, return the estimated amount of space required, plus n;
 *     otherwise, return the amount of space actually allocated when
 *     memory allocation failure occurred.
 * 
*/ int_t symbfact_SubInit /************************************************************************/ ( fact_t fact, void *work, int_t lwork, int_t m, int_t n, int_t annz, Glu_persist_t *Glu_persist, Glu_freeable_t *Glu_freeable ) { int_t iword; int_t *xsup, *supno; int_t *lsub, *xlsub; int_t *usub, *xusub; int_t nzlmax, nzumax; int_t FILL = sp_ienv_dist(6); int iam; #if ( DEBUGlevel>=1 ) MPI_Comm_rank( MPI_COMM_WORLD, &iam ); CHECK_MALLOC(iam, "Enter symbfact_SubInit()"); #endif no_expand = 0; iword = sizeof(int_t); expanders = (ExpHeader *) SUPERLU_MALLOC( NO_MEMTYPE*sizeof(ExpHeader) ); if ( !expanders ) ABORT("SUPERLU_MALLOC fails for expanders"); if ( fact == DOFACT || fact == SamePattern ) { /* Guess for L\U factors */ nzlmax = FILL * annz; nzumax = FILL/2.0 * annz; if ( lwork == -1 ) { return ( GluIntArray(n) * iword + TempSpace(m,1) + (nzlmax+nzumax)*iword + n ); } else { SetupSpace(work, lwork, &Glu_freeable->MemModel); } #if ( PRNTlevel>=2 ) printf(".. symbfact_SubInit(): annz %ld, nzlmax %ld, nzumax %ld\n", annz, nzlmax, nzumax); #endif /* Integer pointers for L\U factors */ if ( Glu_freeable->MemModel == SYSTEM ) { xsup = intMalloc_dist(n+1); supno = intMalloc_dist(n+1); xlsub = intMalloc_dist(n+1); xusub = intMalloc_dist(n+1); } else { xsup = (int_t *)user_malloc_dist((n+1) * iword, HEAD); supno = (int_t *)user_malloc_dist((n+1) * iword, HEAD); xlsub = (int_t *)user_malloc_dist((n+1) * iword, HEAD); xusub = (int_t *)user_malloc_dist((n+1) * iword, HEAD); } lsub = (int_t *) expand(&nzlmax, (MemType) LSUB, 0, 0, Glu_freeable); usub = (int_t *) expand(&nzumax, (MemType) USUB, 0, 0, Glu_freeable); while ( !lsub || !usub ) { if ( Glu_freeable->MemModel == SYSTEM ) { SUPERLU_FREE(lsub); SUPERLU_FREE(usub); } else { user_free_dist((nzlmax+nzumax)*iword, HEAD); } nzlmax /= 2; nzumax /= 2; if ( nzumax < annz/2 ) { printf("Not enough memory to perform factorization.\n"); return (memory_usage(nzlmax, nzumax, n) + n); } #if ( PRNTlevel>=1 ) printf("(%d).. symbfact_SubInit() reduce size:" "nzlmax %ld, nzumax %ld\n", iam, (long long) nzlmax, (long long) nzumax); fflush(stdout); #endif lsub = (int_t *) expand( &nzlmax, (MemType) LSUB, 0, 0, Glu_freeable ); usub = (int_t *) expand( &nzumax, (MemType) USUB, 0, 1, Glu_freeable ); } Glu_persist->xsup = xsup; Glu_persist->supno = supno; Glu_freeable->lsub = lsub; Glu_freeable->xlsub = xlsub; Glu_freeable->usub = usub; Glu_freeable->xusub = xusub; Glu_freeable->nzlmax = nzlmax; Glu_freeable->nzumax = nzumax; } else { /* fact == SamePattern_SameRowPerm */ if ( lwork == -1 ) { return ( GluIntArray(n) * iword + TempSpace(m, 1) + (nzlmax+nzumax)*iword + n ); } else if ( lwork == 0 ) { Glu_freeable->MemModel = SYSTEM; } else { Glu_freeable->MemModel = USER; stack.top2 = (lwork/4)*4; /* must be word-addressable */ stack.size = stack.top2; } expanders[USUB].mem = Glu_freeable->usub; expanders[LSUB].mem = Glu_freeable->lsub; expanders[USUB].size = nzumax; expanders[LSUB].size = nzlmax; } ++no_expand; #if ( DEBUGlevel>=1 ) /* Memory allocated but not freed: xsup, supno */ CHECK_MALLOC(iam, "Exit symbfact_SubInit()"); #endif return 0; } /* SYMBFACT_SUBINIT */ /************************************************************************/ /*! \brief * *
 * Expand the data structures for L and U during the factorization.
 * Return value:   0 - successful return
 *               > 0 - number of bytes allocated when run out of space
 * 
*/ int_t symbfact_SubXpand /************************************************************************/ ( int_t n, /* total number of columns */ int_t jcol, /* current column */ int_t next, /* number of elements currently in the factors */ MemType mem_type, /* which type of memory to expand */ int_t *maxlen, /* modified - maximum length of a data structure */ Glu_freeable_t *Glu_freeable /* modified - global LU data structures */ ) { void *new_mem; #if ( DEBUGlevel>=1 ) printf("symbfact_SubXpand(): jcol " IFMT ", next " IFMT ", maxlen " IFMT ", MemType " IFMT "\n", jcol, next, *maxlen, mem_type); #endif new_mem = expand(maxlen, mem_type, next, 0, Glu_freeable); if ( !new_mem ) { int_t nzlmax = Glu_freeable->nzlmax; int_t nzumax = Glu_freeable->nzumax; fprintf(stderr, "Can't expand MemType %d: jcol " IFMT "\n", mem_type, jcol); return (memory_usage(nzlmax, nzumax, n) + n); } if ( mem_type == LSUB ) { Glu_freeable->lsub = (int_t *) new_mem; Glu_freeable->nzlmax = *maxlen; } else if ( mem_type == USUB ) { Glu_freeable->usub = (int_t *) new_mem; Glu_freeable->nzumax = *maxlen; } else ABORT("Tries to expand nonexisting memory type.\n"); return 0; } /* LUSUB_XPAND */ /************************************************************************/ /*! \brief * *
 * Deallocate storage of the data structures common to symbolic
 * factorization routines.
 * 
*/ int_t symbfact_SubFree(Glu_freeable_t *Glu_freeable) /************************************************************************/ { #if ( DEBUGlevel>=1 ) int iam; MPI_Comm_rank( MPI_COMM_WORLD, &iam ); CHECK_MALLOC(iam, "Enter symbfact_SubFree()"); #endif SUPERLU_FREE(expanders); SUPERLU_FREE(Glu_freeable->lsub); SUPERLU_FREE(Glu_freeable->xlsub); SUPERLU_FREE(Glu_freeable->usub); SUPERLU_FREE(Glu_freeable->xusub); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit symbfact_SubFree()"); #endif return 0; } /* SYMBFACT_SUBFREE */ /************************************************************************/ /*! \brief * *
 * Expand the existing storage to accommodate more fill-ins.
 * 
*/ static void *expand /************************************************************************/ ( int_t *prev_len, /* length used from previous call */ MemType type, /* which part of the memory to expand */ int_t len_to_copy, /* size of the memory to be copied to new store */ int_t keep_prev, /* = 1: use prev_len; = 0: compute new_len to expand */ Glu_freeable_t *Glu_freeable /* modified - global LU data structures */ ) { float EXPAND = 1.5; float alpha; void *new_mem; int_t new_len, tries, lword, extra, bytes_to_copy; alpha = EXPAND; lword = sizeof(int_t); if ( no_expand == 0 || keep_prev ) /* First time allocate requested */ new_len = *prev_len; else { new_len = alpha * *prev_len; } if ( Glu_freeable->MemModel == SYSTEM ) { new_mem = (void *) SUPERLU_MALLOC((size_t) new_len * lword); /*new_mem = (void *) calloc(new_len, lword); */ if ( no_expand != 0 ) { tries = 0; if ( keep_prev ) { if ( !new_mem ) return (NULL); } else { while ( !new_mem ) { if ( ++tries > 10 ) return (NULL); alpha = Reduce(alpha); new_len = alpha * *prev_len; new_mem = (void*) SUPERLU_MALLOC((size_t)new_len * lword); /* new_mem = (void *) calloc(new_len, lword); */ } } copy_mem_int(len_to_copy, expanders[type].mem, new_mem); SUPERLU_FREE (expanders[type].mem); } expanders[type].mem = (void *) new_mem; } else { /* MemModel == USER */ if ( no_expand == 0 ) { new_mem = user_malloc_dist((size_t)new_len * lword, HEAD); expanders[type].mem = (void *) new_mem; } else { tries = 0; extra = (new_len - *prev_len) * lword; if ( keep_prev ) { if ( StackFull(extra) ) return (NULL); } else { while ( StackFull(extra) ) { if ( ++tries > 10 ) return (NULL); alpha = Reduce(alpha); new_len = alpha * *prev_len; extra = (new_len - *prev_len) * lword; } } if ( type != USUB ) { new_mem = (void*)((char*)expanders[type + 1].mem + extra); bytes_to_copy = (char*)stack.array + stack.top1 - (char*)expanders[type + 1].mem; user_bcopy(expanders[type+1].mem, new_mem, bytes_to_copy); if ( type < USUB ) { Glu_freeable->usub = expanders[USUB].mem = (void*)((char*)expanders[USUB].mem + extra); } if ( type < LSUB ) { Glu_freeable->lsub = expanders[LSUB].mem = (void*)((char*)expanders[LSUB].mem + extra); } stack.top1 += extra; stack.used += extra; } /* if ... */ } /* else ... */ } expanders[type].size = new_len; *prev_len = new_len; if ( no_expand ) ++no_expand; return (void *) expanders[type].mem; } /* EXPAND */ /************************************************************************/ /*! \brief * *
 * mem_usage consists of the following fields:
 *    - for_lu (float)
 *      The amount of space used in bytes for the L\U data structures.
 *    - total (float)
 *      The amount of space needed in bytes to perform factorization.
 *    - expansions (int)
 *      Number of memory expansions during the LU factorization.
 * 
*/ int_t QuerySpace_dist(int_t n, int_t lsub_size, Glu_freeable_t *Glu_freeable, superlu_dist_mem_usage_t *mem_usage) /************************************************************************/ { register int_t iword = sizeof(int_t); extern int_t no_expand; /* For the adjacency graphs of L and U. */ /*mem_usage->for_lu = (float)( (4*n + 3) * iword + Glu_freeable->xlsub[n]*iword );*/ mem_usage->for_lu = (float)( (4*n + 3) * iword + lsub_size * iword ); mem_usage->for_lu += (float)( (n + 1) * iword + Glu_freeable->xusub[n]*iword ); /* Working storage to support factorization */ mem_usage->total = mem_usage->for_lu + 9*n*iword; mem_usage->expansions = --no_expand; return 0; } /* QUERYSPACE_DIST */ static int_t memory_usage(const int_t nzlmax, const int_t nzumax, const int_t n) { register int_t iword = sizeof(int_t); return (10*n*iword + (nzlmax+nzumax)*iword); } SuperLU_DIST_5.3.0/SRC/dbinary_io.c0000644013363400111340000000244213233431301015550 0ustar xiaoyessg#include "superlu_ddefs.h" int dread_binary(FILE *fp, int_t *m, int_t *n, int_t *nnz, double **nzval, int_t **rowind, int_t **colptr) { size_t isize = sizeof(int_t), dsize = sizeof(double); int nnz_read; fread(n, isize, 1, fp); fread(nnz, isize, 1, fp); printf("fread n %d\tnnz %d\n", *n, *nnz); *m = *n; *colptr = intMalloc_dist(*n+1); *rowind = intMalloc_dist(*nnz); *nzval = doubleMalloc_dist(*nnz); fread(*colptr, isize, (size_t) (*n + 1), fp); fread(*rowind, isize, (size_t) *nnz, fp); nnz_read = fread(*nzval, dsize, (size_t) (*nnz), fp); printf("# of doubles fread: %d\n", nnz_read); fclose(fp); } int dwrite_binary(int_t n, int_t nnz, double *values, int_t *rowind, int_t *colptr) { FILE *fp1; int nnz_written; size_t isize = sizeof(int_t), dsize = sizeof(double); fp1 = fopen("/scratch/scratchdirs/xiaoye/temp.bin", "wb"); fwrite(&n, isize, 1, fp1); fwrite(&nnz, isize, 1, fp1); fwrite(colptr, isize, n+1, fp1); fwrite(rowind, isize, nnz, fp1); nnz_written = fwrite(values, dsize, nnz, fp1); printf("n %d, # of double: %d\n", n, nnz); printf("dump binary file ... # of double fwrite: %d\n", nnz_written); assert(nnz_written==nnz); fclose(fp1); } SuperLU_DIST_5.3.0/TEST/0000755013363400111340000000000013233431301013413 5ustar xiaoyessgSuperLU_DIST_5.3.0/TEST/pdtest.sh0000755013363400111340000000314013233431301015253 0ustar xiaoyessg#!/bin/bash # bash hint: == is for string comparisons, -eq is for numeric ones. ofile=pdtest.out # output file if [ -e $ofile ]; then rm -f $ofile fi echo "Double-precision testing output" > $ofile MATRICES=(../EXAMPLE/g20.rua) NPROWS="1 2" NPCOLS="1 3" NVAL="9 19" NRHS="1 3" FILLRATIO="2 6" # following are blocking parameters, see sp_ienv.c RELAX="4 8" SUPERSIZE="10 20" MINGEMM="10000" ## # Loop through all matrices ... # for mat in $MATRICES; do #-------------------------------------------- # Test matrix types generated in LAPACK-style #-------------------------------------------- if [ "$mat" == "LAPACK" ]; then echo '== LAPACK test matrices' >> $ofile for n in $NVAL ; do for s in $NRHS ; do echo '' >> $ofile echo 'n='$n 'nrhs='$s >> $ofile mpiexec -n 2 pdtest -r 1 -c 2 -x 4 -m 10 -b 5 -s 1 >> $ofile done done #-------------------------------------------- # Test a specified sparse matrix #-------------------------------------------- else echo '' >> $ofile echo '== sparse matrix:' $m >> $ofile for s in $NRHS; do for r in $NPROWS; do for c in $NPCOLS; do np=$(($r*$c)) for b in $FILLRATIO; do for x in $RELAX; do for m in $SUPERSIZE; do echo '' >> $ofile echo "**-- nrhs = $s, process grid = $r X $c, fill $b, relax $x, max-super $m" echo "**-- nrhs = $s, process grid = $r X $c, fill $b, relax $x, max-super $m" >> $ofile mpiexec -n $np pdtest -r $r -c $c -x $x -m $m -b $b -s 1 -f $mat >> $ofile done done done done done done fi done SuperLU_DIST_5.3.0/TEST/pztest.c0000644013363400111340000004130313233431301015111 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Driver program for testing PZGSSVX. * *
 * -- Distributed SuperLU routine (version 5.2) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 30, 2017
 * 
*/ /* * File name: pztest.c * Purpose: MAIN test program */ #include #include //#include #ifdef _MSC_VER #include #else #include #endif #include #include "superlu_dist_config.h" #include "superlu_zdefs.h" #define NTESTS 1 /*5*/ /* Number of test types */ #define NTYPES 11 /* Number of matrix types */ #define NTRAN 2 #define THRESH 20.0 #define FMT1 "%10s:n=%d, test(%d)=%12.5g\n" #define FMT2 "%10s:fact=%4d, DiagScale=%d, n=%d, imat=%d, test(%d)=%12.5g, berr=%12.5g\n" #define FMT3 "%10s:info=%d, izero=%d, n=%d, nrhs=%d, imat=%d, nfail=%d\n" static void parse_command_line(int argc, char *argv[], int *nprow, int *npcol, char *matrix_type, int *n, int *relax, int *maxsuper, int *fill_ratio, int *min_gemm_gpu_offload, int *nrhs, FILE **fp); extern int pzcompute_resid(int m, int n, int nrhs, SuperMatrix *A, doublecomplex *x, int ldx, doublecomplex *b, int ldb, gridinfo_t *grid, SOLVEstruct_t *SOLVEstruct, double *resid); /*! \brief Copy matrix A into matrix B, in distributed compressed row format. */ void zCopy_CompRowLoc_Matrix_dist(SuperMatrix *A, SuperMatrix *B) { NRformat_loc *Astore; NRformat_loc *Bstore; int_t i, nnz_loc, m_loc; B->Stype = A->Stype; B->Dtype = A->Dtype; B->Mtype = A->Mtype; B->nrow = A->nrow;; B->ncol = A->ncol; Astore = (NRformat_loc *) A->Store; Bstore = (NRformat_loc *) B->Store; Bstore->nnz_loc = Astore->nnz_loc; nnz_loc = Astore->nnz_loc; Bstore->m_loc = Astore->m_loc; m_loc = Astore->m_loc; Bstore->fst_row = Astore->fst_row; memcpy(Bstore->nzval, Astore->nzval, nnz_loc * sizeof(doublecomplex)); memcpy(Bstore->colind, Astore->colind, nnz_loc * sizeof(int_t)); memcpy(Bstore->rowptr, Astore->rowptr, (m_loc+1) * sizeof(int_t)); } /*! \brief Print a summary of the testing results. */ void PrintSumm(char *type, int nfail, int nrun, int nerrs) { if ( nfail > 0 ) printf("%3s driver: %d out of %d tests failed to pass the threshold\n", type, nfail, nrun); else printf("All tests for %3s driver passed the threshold (%6d tests run)\n", type, nrun); if ( nerrs > 0 ) printf("%6d error messages recorded\n", nerrs); } int main(int argc, char *argv[]) { /* *
 * Purpose
 * =======
 *
 * PZTEST is the main test program for the DOUBLE COMPLEX linear 
 * equation driver routines PZGSSVX.
 * 
 * The program is invoked by a shell script file -- dtest.csh.
 * The output from the tests are written into a file -- dtest.out.
 */
    superlu_dist_options_t options;
    SuperLUStat_t stat;
    SuperMatrix A, Asave;
    NRformat_loc *Astore;
    ScalePermstruct_t ScalePermstruct;
    LUstruct_t LUstruct;
    SOLVEstruct_t SOLVEstruct;
    gridinfo_t grid;
    doublecomplex   *nzval_save;
    int_t    *colind_save, *rowptr_save;
    double   *berr, *R, *C;
    doublecomplex   *b, *bsave, *xtrue, *solx;
    int    i, j, m, n, izero = 0;
    int    nprow, npcol;
    int    iam, info, ldb, ldx, nrhs;
    int_t  iinfo;
    char     **cpp, c;
    FILE *fp, *fopen();
    char matrix_type[8], equed[1];
    int  relax, maxsuper=sp_ienv_dist(3), fill_ratio=sp_ienv_dist(6),
         min_gemm_gpu_offload=0;
    int    equil, ifact, nfact, iequil, iequed, prefact, notfactored;
    int    nt, nrun=0, nfail=0, nerrs=0, imat, fimat=0, nimat=1;
    fact_t fact;
    double rowcnd, colcnd, amax;
    double result[NTESTS];

    /* Fixed set of parameters */
    int     iseed[]  = {1988, 1989, 1990, 1991};
    char    equeds[]  = {'N', 'R', 'C', 'B'};
    DiagScale_t equils[] = {NOEQUIL, ROW, COL, BOTH};
    fact_t  facts[] = {FACTORED, DOFACT, SamePattern, SamePattern_SameRowPerm};
    trans_t transs[]  = {NOTRANS, TRANS, CONJ};

    nprow = 1;  /* Default process rows.      */
    npcol = 1;  /* Default process columns.   */
    nrhs = 1;   /* Number of right-hand side. */
    for (i = 0; i < NTESTS; ++i) result[i] = 0.0;

    /* Parse command line argv[]. */
    parse_command_line(argc, argv, &nprow, &npcol, matrix_type, &n,
		       &relax, &maxsuper,
		       &fill_ratio, &min_gemm_gpu_offload, &nrhs, &fp);

    /* ------------------------------------------------------------
       INITIALIZE MPI ENVIRONMENT. 
       ------------------------------------------------------------*/
    MPI_Init( &argc, &argv );

    /* ------------------------------------------------------------
       INITIALIZE THE SUPERLU PROCESS GRID. 
       ------------------------------------------------------------*/
    superlu_gridinit(MPI_COMM_WORLD, nprow, npcol, &grid);

    /* Bail out if I do not belong in the grid. */
    iam = grid.iam;
    if ( iam >= nprow * npcol )	goto out;
    if ( 0 ) {
        printf("\tProcess grid\t%d X %d\n", (int)grid.nprow, (int)grid.npcol);
	fflush(stdout);
    }

#if ( DEBUGlevel>=1 )
    CHECK_MALLOC(iam, "Enter main()");
#endif

    /* Set the default input options. */
    set_default_options_dist(&options);
    options.PrintStat = NO;
	
    if (!iam) {
	print_sp_ienv_dist(&options);
	print_options_dist(&options);
	fflush(stdout);
    }

    if ( !(berr = doubleMalloc_dist(nrhs)) )
	ABORT("Malloc fails for berr[].");
	
    /* Loop through all the input options. */
    for (imat = fimat; imat < nimat; ++imat) { /* All matrix types */
	//if (!iam) printf("imat loop ... %d\n", imat);
	/* ------------------------------------------------------------
	   GET THE MATRIX FROM FILE AND SETUP THE RIGHT HAND SIDE. 
	   ------------------------------------------------------------*/
	zcreate_matrix(&A, nrhs, &b, &ldb, &xtrue, &ldx, fp, &grid);

	m = A.nrow;
	n = A.ncol;

	if ( !(bsave = doublecomplexMalloc_dist(ldb * nrhs)) )
	    ABORT("Malloc fails for bsave[]");
	for (j = 0; j < nrhs; ++j)
	    for (i = 0; i < ldb; ++i) bsave[i+j*ldb] = b[i+j*ldb];

	/* Save a copy of matrix A in Asave. */
	Astore = (NRformat_loc *) A.Store;
	int_t nnz_loc = Astore->nnz_loc;
	int_t m_loc = Astore->m_loc;
	nzval_save = (doublecomplex *) doublecomplexMalloc_dist(nnz_loc);
	colind_save = (int_t *) intMalloc_dist(nnz_loc);
	rowptr_save = (int_t *) intMalloc_dist(m_loc + 1);
	zCreate_CompRowLoc_Matrix_dist(&Asave, m, n, nnz_loc, m_loc, Astore->fst_row,
				       nzval_save, colind_save, rowptr_save,
				       SLU_NR_loc, SLU_D, SLU_GE);
	zCopy_CompRowLoc_Matrix_dist(&A, &Asave);

	for (iequed = 0; iequed < 4; ++iequed) {
	    int what_equil = equils[iequed];
	    if (iequed == 0) nfact = 4;
	    else { /* Only test factored, pre-equilibrated matrix */
		nfact = 1;
		options.RowPerm = NOROWPERM; /* Turn off MC64 */
	    }
	    //if (!iam) printf("iequed loop ... %d\n", iequed);

	    for (ifact = 0; ifact < nfact; ++ifact) {
		fact = facts[ifact];
		options.Fact = fact;
		//if (!iam) printf("ifact loop ... %d\n", ifact);

		for (equil = 0; equil < 2; ++equil) {

		    //if (!iam) printf("equil loop ... %d\n", equil);

		    options.Equil = equil;
		    /* Need a first factor */
		    prefact   = ( options.Fact == FACTORED ||
				  options.Fact == SamePattern ||
				  options.Fact == SamePattern_SameRowPerm );

		    /* Restore the matrix A. */
		    zCopy_CompRowLoc_Matrix_dist(&Asave, &A);

		    /* Initialize ScalePermstruct and LUstruct. */
		    ScalePermstructInit(m, n, &ScalePermstruct);
		    LUstructInit(n, &LUstruct);

		    //if ( options.Fact == FACTORED || 
		    // options.Fact == SamePattern_SameRowPerm ) {

		    if ( prefact ) {

			R = (double *) SUPERLU_MALLOC(m*sizeof(double));
			C = (double *) SUPERLU_MALLOC(n*sizeof(double));
			
			/* Later call to PZGSSVX only needs to solve. */
                        if ( equil || iequed ) {
			    /* Compute row and column scale factors to
			       equilibrate matrix A.    */
			    pzgsequ(&A, R, C, &rowcnd, &colcnd, &amax, &iinfo,
				    &grid);

			    /* Force equilibration. */
			    if ( iinfo==0 && n > 0 ) {
				if ( what_equil == ROW ) {
				    rowcnd = 0.;
				    colcnd = 1.;
				    ScalePermstruct.DiagScale = ROW;
				    ScalePermstruct.R = R;
				} else if ( what_equil == COL ) {
				    rowcnd = 1.;
				    colcnd = 0.;
				    ScalePermstruct.DiagScale = COL;
				    ScalePermstruct.C = C;
				} else if ( what_equil == BOTH ) {
				    rowcnd = 0.;
				    colcnd = 0.;
				    ScalePermstruct.DiagScale = BOTH;
				    ScalePermstruct.R = R;
				    ScalePermstruct.C = C;
				}
			    }
			
			    /* Equilibrate the matrix. */
			    pzlaqgs(&A, R, C, rowcnd, colcnd, amax, equed);
			    // printf("after pdlaqgs: *equed %c\n", *equed);

			    /* Not equilibrate anymore when calling PDGSSVX,.
			     * so, no malloc/free {R,C} inside PDGSSVX. */
			    options.Equil = NO;
			} /* end if (equil || iequed) */
		    } /* end if prefact */

		    if ( prefact ) { /* Need a first factor */
			
		        /* Save Fact option. */
		        fact = options.Fact;
			options.Fact = DOFACT;

			/* Initialize the statistics variables. */
			PStatInit(&stat);
	
			int nrhs1 = 0; /* Only performs factorization */
			pzgssvx(&options, &A, &ScalePermstruct, b, ldb, nrhs1,
				&grid, &LUstruct, &SOLVEstruct,
				berr, &stat, &info);

			if ( info ) {
			    printf("** First factor: nrun %d: fact %d, info %d, "
				   "equil %d, what_equil %d, DiagScale %d \n",
				   nrun, fact, info, equil, what_equil,
				   ScalePermstruct.DiagScale);
			}

			PStatFree(&stat);

		        /* Restore Fact option. */
			options.Fact = fact;
			if ( fact == SamePattern ) {
			    // {L,U} not re-used in subsequent call to PDGSSVX.
			    Destroy_LU(n, &grid, &LUstruct);
			}

		    } /* end if .. first time factor */

		    /*----------------
		     * Test pzgssvx
		     *----------------*/

		    if ( options.Fact != FACTORED ) {
			/* Restore the matrix A. */
			zCopy_CompRowLoc_Matrix_dist(&Asave, &A);
		    } 

		    /* Set the right-hand side. */
		    zCopy_Dense_Matrix_dist(m_loc, nrhs, bsave, ldb, b, ldb);

		    PStatInit(&stat);

		    /*if ( !iam ) printf("\ttest pdgssvx: nrun %d, iequed %d, equil %d, fact %d\n", 
		      nrun, iequed, equil, options.Fact);*/
		    /* Testing PDGSSVX: solve and compute the error bounds. */
		    pzgssvx(&options, &A, &ScalePermstruct, b, ldb, nrhs,
			    &grid, &LUstruct, &SOLVEstruct,
			    berr, &stat, &info);

		    PStatFree(&stat);
#if 0
		    pdinf_norm_error(iam, ((NRformat_loc *)A.Store)->m_loc,
				     nrhs, b, ldb, xtrue, ldx, &grid);
#endif
		    /*		    if ( info && info != izero ) {*/
		    if ( info ) {
			printf(FMT3, "pzgssvx",info,izero,n,nrhs,imat,nfail);
		    } else {
			/* Restore the matrix A. */
			zCopy_CompRowLoc_Matrix_dist(&Asave, &A);

			/* Compute residual of the computed solution.*/
			solx = b;
			pzcompute_resid(m, n, nrhs, &A, solx, ldx, bsave, ldb,
					&grid, &SOLVEstruct, &result[0]);
			
#if 0  /* how to get RCOND? */
			/* Check solution accuracy from generated exact solution. */
			dgst04(n, nrhs, solx, ldx, xact, ldx, rcond,
					  &result[2]);
			pdinf_norm_error(iam, ((NRformat_loc *)A.Store)->m_loc,
					 nrhs, b, ldb, xtrue, ldx, &grid);
#endif

			/* Print information about the tests that did
			   not pass the threshold.    */
			int k1 = 0;
			for (i = k1; i < NTESTS; ++i) {
			    if ( result[i] >= THRESH ) {
				printf(FMT2, "pzgssvx", options.Fact, 
				       ScalePermstruct.DiagScale,
				       n, imat, i, result[i], berr[0]);
				++nfail;
			    }
			}
			nrun += NTESTS;
		    } /* end else .. info == 0 */
		   
		    /* -----------------------------------------------------
		       Deallocate storage associated with {L,U}.
		       ----------------------------------------------------- */
		    if ( prefact ) {
			SUPERLU_FREE(R);
			SUPERLU_FREE(C);
			ScalePermstruct.DiagScale = NOEQUIL; /* Avoid free R/C again. */
		    }
		    ScalePermstructFree(&ScalePermstruct);
		    Destroy_LU(n, &grid, &LUstruct);
		    LUstructFree(&LUstruct);
		    if ( options.SolveInitialized ) {
			zSolveFinalize(&options, &SOLVEstruct);
		    }

		} /* end for equil ... */
		    
	    } /* end for ifact ... */
		
	} /* end for iequed ... */
	
	/* ------------------------------------------------------------
	   DEALLOCATE STORAGE.
	   ------------------------------------------------------------*/
	Destroy_CompRowLoc_Matrix_dist(&A);
	Destroy_CompRowLoc_Matrix_dist(&Asave);
	//	ScalePermstructFree(&ScalePermstruct);
	SUPERLU_FREE(b);
	SUPERLU_FREE(bsave);
	SUPERLU_FREE(xtrue);

    } /* end for imat ... */

    /* Print a summary of the testing results. */
    if ( iam==0 ) PrintSumm("DGS", nfail, nrun, nerrs);

    SUPERLU_FREE(berr);

    /* ------------------------------------------------------------
       RELEASE THE SUPERLU PROCESS GRID.
       ------------------------------------------------------------*/
out:
    superlu_gridexit(&grid);

    /* ------------------------------------------------------------
       TERMINATES THE MPI EXECUTION ENVIRONMENT.
       ------------------------------------------------------------*/
    MPI_Finalize();

#if ( DEBUGlevel>=1 )
    CHECK_MALLOC(iam, "Exit main()");
#endif

}

/*  
 * Parse command line options to get various input parameters.
 */
static void
parse_command_line(int argc, char *argv[], int *nprow, int *npcol,
		   char *matrix_type, int *n, int *relax, int *maxsuper,
		   int *fill_ratio, int *min_gemm_gpu_offload,
		   int *nrhs, FILE **fp)
{
    int c;
    extern char *optarg;
    char  str[20];
    char *xenvstr, *menvstr, *benvstr, *genvstr;
    xenvstr = menvstr = benvstr = genvstr = 0;

    while ( (c = getopt(argc, argv, "hr:c:t:n:x:m:b:g:s:f:")) != EOF ) {
	switch (c) {
	  case 'h':
	    printf("Options:\n");
	    printf("\t-r  - process rows\n");
	    printf("\t-c  - process columns\n");
	    printf("\t-n  - matrix dimension\n");
	    printf("\t-x  - granularity of relaxed supernodes\n");
	    printf("\t-m  - maximum size of supernode\n");
	    printf("\t-b  - estimated fill ratio to allocate storage\n");
	    printf("\t-g  - minimum size of GEMM to offload to GPU\n");
	    printf("\t-s  - number of right-hand sides\n");
	    printf("\t-f  - file name storing a sparse matrix\n");
	    exit(1);
	    break;
	  case 'r': *nprow = atoi(optarg);
	            break;
	  case 'c': *npcol = atoi(optarg);
	            break;
	  case 'n': *n = atoi(optarg);
	            break;
// Use putenv as exists on Windows
#ifdef _MSC_VER
#define putenv _putenv
#endif
	  case 'x': // c = atoi(optarg); 
	            // sprintf(str, "%d", c);
	            // setenv("NREL", str, 1);
		    xenvstr = (char*) malloc((6+strlen(optarg))*sizeof(char));
		    strcpy(xenvstr, "NREL=");
		    strcat(xenvstr, optarg);
		    putenv(xenvstr);
	            //printf("Reset relax env. variable to %d\n", c);
	            break;
	  case 'm': // c = atoi(optarg); 
	            // sprintf(str, "%d", c);
		    // setenv("NSUP", str, 1);
		    menvstr = (char*) malloc((6+strlen(optarg))*sizeof(char));
		    strcpy(menvstr, "NSUP=");
		    strcat(menvstr, optarg);
		    putenv(menvstr);
		    //printf("Reset maxsuper env. variable to %d\n", c);
	            break;
	  case 'b': // c = atoi(optarg); 
	            // sprintf(str, "%d", c);
		    // setenv("FILL", str, 1);
		    benvstr = (char*) malloc((6+strlen(optarg))*sizeof(char));
		    strcpy(benvstr, "FILL=");
		    strcat(benvstr, optarg);
		    putenv(benvstr);
		    //printf("Reset fill_ratio env. variable to %d\n", c);
	            break;
	  case 'g': // c = atoi(optarg); 
	            // sprintf(str, "%d", c);
		    // setenv("N_GEMM", str, 1);
		    genvstr = (char*) malloc((8+strlen(optarg))*sizeof(char));
		    strcpy(genvstr, "N_GEMM=");
		    strcat(genvstr, optarg);
		    putenv(genvstr);
		    //printf("Reset min_gemm_gpu_offload env. variable to %d\n", c);
	            break;
	  case 's': *nrhs = atoi(optarg); 
	            break;
          case 'f':
                    if ( !(*fp = fopen(optarg, "r")) ) {
                        ABORT("File does not exist");
                    }
                    //printf(".. test sparse matrix in file: %s\n", optarg);
                    break;
  	}
    }
}

int cpp_defs()
{
    printf(".. CPP definitions:\n");
#if ( PRNTlevel>=1 )
    printf("\tPRNTlevel = %d\n", PRNTlevel);
#endif
#if ( DEBUGlevel>=1 )
    printf("\tDEBUGlevel = %d\n", DEBUGlevel);
#endif
#if ( PROFlevel>=1 )
    printf("\tPROFlevel = %d\n", PROFlevel);
#endif
#if ( StaticPivot>=1 )
    printf("\tStaticPivot = %d\n", StaticPivot);
#endif
    printf("....\n");
    return 0;
}
SuperLU_DIST_5.3.0/TEST/Makefile0000644013363400111340000000306313233431301015055 0ustar  xiaoyessg#######################################################################
#  This makefile creates the test programs for the linear equation
#  routines in SuperLU_DIST.  The test files are grouped as follows:
#
#       DLINTST -- Double precision real test routines
#       ZLINTST -- Double precision complex test routines
#
#  Test programs can be generated for all or some of the two different
#  precisions.  Enter make followed by one or more of the data types
#  desired.  Some examples:
#       make complex16
#  Alternatively, the command
#       make
#  without any arguments creates all two test programs.
#  The executable files are called
#       pdtest
#       pztest
#
#  To remove the object files after the executable files have been
#  created, enter
#       make clean
#  On some systems, you can force the source files to be recompiled by
#  entering (for example)
#       make double FRC=FRC
#
#  Creation date:	March 16, 2017
#  Modified: 	
#######################################################################

include ../make.inc
HEADER  = ../SRC

DLINTST = pdtest.o dcreate_matrix.o pdcompute_resid.o

ZLINTST = pztest.o zcreate_matrix.o pzcompute_resid.o

all: double complex16

testmat:
	(cd MATGEN; $(MAKE))

./pdtest: $(DLINTST) $(DSUPERLULIB) $(TMGLIB)
	$(LOADER) $(LOADOPTS) $(DLINTST) $(TMGLIB) $(LIBS) -lm -o $@

./pztest: $(ZLINTST) $(DSUPERLULIB) $(TMGLIB)
	$(LOADER) $(LOADOPTS) $(ZLINTST) $(TMGLIB) $(LIBS) -lm -o $@

double: ./pdtest
complex16: ./pztest

.c.o:
	$(CC) $(CFLAGS) $(CDEFS) -I$(HEADER) -c $< $(VERBOSE)

clean:	
	rm -f *.o *test *.out

SuperLU_DIST_5.3.0/TEST/README0000644013363400111340000000076013233431301014276 0ustar  xiaoyessg		SuperLU_DIST TEST
		=================

This directory contains testing programs to test various functions
provided in SuperLU_DIST. 

1. To run the tests (pdtest for real, pztest for complex), you may type:
  $ mpiexec -n  pdtest -r  -c  -f ../EXAMPLE/g20.rua 
  $ mpiexec -n  pztest -r  -c  -f ../EXAMPLE/cg20.cua

2. bash scripts to run tests:
   - pdtest.sh / pztest.sh : invoke many runs varying several input parameters.
SuperLU_DIST_5.3.0/TEST/pztest.sh0000755013363400111340000000313713233431301015307 0ustar  xiaoyessg#!/bin/bash

# bash hint: == is for string comparisons, -eq is for numeric ones.

ofile=pztest.out			# output file
if [ -e $ofile ]; then
    rm -f $ofile
fi
echo "Double-complex testing output" > $ofile

MATRICES=(../EXAMPLE/cg20.cua)
NPROWS="1 2"
NPCOLS="1 3"
NVAL="9 19"
NRHS="1 3"
FILLRATIO="2 6"
# following are blocking parameters, see sp_ienv.c
RELAX="4 8"
SUPERSIZE="10 20"
MINGEMM="10000"

##
# Loop through all matrices ...
#
for mat in $MATRICES; do

  #--------------------------------------------
  # Test matrix types generated in LAPACK-style
  #--------------------------------------------
  if  [ "$mat" == "LAPACK" ]; then
      echo '== LAPACK test matrices' >> $ofile
      for n in $NVAL ; do
        for s in $NRHS ; do
	    echo '' >> $ofile
            echo 'n='$n 'nrhs='$s >> $ofile
	      mpiexec -n 2 pztest -r 1 -c 2 -x 4 -m 10 -b 5 -s 1 >> $ofile
        done
      done
  #--------------------------------------------
  # Test a specified sparse matrix
  #--------------------------------------------
  else
    echo '' >> $ofile
    echo '== sparse matrix:' $m >> $ofile
    for s in $NRHS; do
      for r in $NPROWS; do
	for c in $NPCOLS; do
	  np=$(($r*$c))
	  for b in $FILLRATIO; do
	    for x in $RELAX; do
	      for m in $SUPERSIZE; do
		echo '' >> $ofile
   	        echo "**-- nrhs = $s, process grid = $r X $c, fill $b, relax $x, max-super $m"
   	        echo "**-- nrhs = $s, process grid = $r X $c, fill $b, relax $x, max-super $m" >> $ofile
		mpiexec -n $np pztest -r $r -c $c -x $x -m $m -b $b -s 1 -f $mat >> $ofile
	      done
	    done
	  done
	done
      done
    done
  fi
done

SuperLU_DIST_5.3.0/TEST/CMakeLists.txt0000644013363400111340000000546713233431301016167 0ustar  xiaoyessginclude_directories(${SuperLU_DIST_SOURCE_DIR}/SRC)

# Libs linked to all of the tests
set(all_link_libs superlu_dist ${BLAS_LIB})
if (NOT MSVC)
  list(APPEND all_link_libs m)
endif ()

set(MATRICES ../EXAMPLE/g20.rua)  # sample sparse matrix from a file
set(NPROWS 1 2)		  # process rows
set(NPCOLS 1 3) 	  # process columns 
set(NVAL 9 19)	  	  # generated matrix dimensions
set(NRHS 1 3)		  # number of RHS
# set(FILLRATIO 2 10)	  # estimated fill ratio
set(FILLRATIO 2)	  # estimated fill ratio
# following are blocking parameters, see sp_ienv.c
set(RELAX 8)	   	  # relaxed supernode size: 4 8
set(SUPERSIZE 20)   	  # maximum supernode size: 10 20
set(MINGEMM 10000)	  # minimum GEMM size for GPU offload

function(cat IN_FILE OUT_FILE)
  file(READ ${IN_FILE} CONTENTS)
  file(APPEND ${OUT_FILE} "${CONTENTS}")
endfunction()

# Function to perform test
# call API:  add_superlu_dist_tests(pddrive big.rua)
function(add_superlu_dist_tests target input)
   set(TEST_INPUT "${SuperLU_DIST_SOURCE_DIR}/EXAMPLE/${input}")
   set(TEST_OUTPUT "${SuperLU_DIST_BINARY_DIR}/TEST/${target}.out")

  # Prepare a temporary file to "cat" to:
  # file(WRITE ${TEST_OUTPUT} "")

##  get_target_property(TEST_LOC ${target} LOCATION)
   set(TEST_LOC ${CMAKE_CURRENT_BINARY_DIR})

   foreach (r ${NPROWS})
      foreach (c ${NPCOLS})
        MATH( EXPR np "${r}*${c}" )
        foreach (s ${NRHS})
	  foreach (b ${FILLRATIO})
	    foreach (x ${RELAX})
	      foreach (m ${SUPERSIZE})
                set(testName "${target}_${r}x${c}_${s}_${b}_${x}_${m}")
	  	set(SINGLE_OUTPUT ${SuperLU_DIST_BINARY_DIR}/TEST/${testName}.out)
          add_test( ${testName}_SP 
	    	    ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} ${np}
            	    ${MPIEXEC_PREFLAGS} ${target} ${MPIEXEC_POSTFLAGS} 
		    -r ${r} -c ${c} -s ${s} -b ${b} -x ${x} -m ${m} -f ${TEST_INPUT}
		  ) 
#          add_test( ${testName}_SP "${CMAKE_COMMAND}"
#	    -DTEST=${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} ${np}
#            ${MPIEXEC_PREFLAGS} ${target} ${MPIEXEC_POSTFLAGS} -r ${r} -c ${c} -s ${s} -b ${b} -x ${x} -m ${m} -f ${TEST_INPUT}
#	    -DOUTPUT=${SINGLE_OUTPUT}
#	    -DALL_OUTPUT=${TEST_OUTPUT}
#	    -DHEADING=Sparse\ matrix\ ${TEST_INPUT}\ --\ r=${r},\ c=${c},\ s=${s},\ x=${x},\ m=${m}
#	    -P "${SuperLU_DIST_SOURCE_DIR}/TEST/runtest.cmake"
#		  )
	      endforeach()
	    endforeach()
	  endforeach()
	endforeach()
      endforeach()
   endforeach()

# MPI variables:
# ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} PROCS
#  	${MPIEXEC_PREFLAGS} EXECUTABLE ${MPIEXEC_POSTFLAGS} ARGS)

endfunction(add_superlu_dist_tests)

if(enable_double)
  set(DTEST pdtest.c dcreate_matrix.c pdcompute_resid.c)
  add_executable(pdtest ${DTEST})
  target_link_libraries(pdtest ${all_link_libs})
  add_superlu_dist_tests(pdtest g20.rua)
endif()

#if(enable_complex16)
#  set(ZTEST pztest.c zcreate_matrix.c pzcompute_resid.c)
#endif()
SuperLU_DIST_5.3.0/TEST/zcreate_matrix.c0000644013363400111340000001566613233431301016616 0ustar  xiaoyessg/*! \file
Copyright (c) 2003, The Regents of the University of California, through
Lawrence Berkeley National Laboratory (subject to receipt of any required 
approvals from U.S. Dept. of Energy) 

All rights reserved. 

The source code is distributed under BSD license, see the file License.txt
at the top-level directory.
*/

/*! @file 
 * \brief Read the matrix from data file
 *
 * 
 * -- Distributed SuperLU routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * March 15, 2003
 * 
*/ #include #include "superlu_zdefs.h" /* \brief * *
 * Purpose
 * =======
 * 
 * ZCREATE_MATRIX read the matrix from data file in Harwell-Boeing format,
 * and distribute it to processors in a distributed compressed row format.
 * It also generate the distributed true solution X and the right-hand
 * side RHS.
 *
 *
 * Arguments   
 * =========      
 *
 * A     (output) SuperMatrix*
 *       Local matrix A in NR_loc format. 
 *
 * NRHS  (input) int_t
 *       Number of right-hand sides.
 *
 * RHS   (output) doublecomplex**
 *       The right-hand side matrix.
 *
 * LDB   (output) int*
 *       Leading dimension of the right-hand side matrix.
 *
 * X     (output) doublecomplex**
 *       The true solution matrix.
 *
 * LDX   (output) int*
 *       The leading dimension of the true solution matrix.
 *
 * FP    (input) FILE*
 *       The matrix file pointer.
 *
 * GRID  (input) gridinof_t*
 *       The 2D process mesh.
 * 
*/ int zcreate_matrix(SuperMatrix *A, int nrhs, doublecomplex **rhs, int *ldb, doublecomplex **x, int *ldx, FILE *fp, gridinfo_t *grid) { SuperMatrix GA; /* global A */ doublecomplex *b_global, *xtrue_global; /* replicated on all processes */ int_t *rowind, *colptr; /* global */ doublecomplex *nzval; /* global */ doublecomplex *nzval_loc; /* local */ int_t *colind, *rowptr; /* local */ int_t m, n, nnz; int_t m_loc, fst_row, nnz_loc; int_t m_loc_fst; /* Record m_loc of the first p-1 processors, when mod(m, p) is not zero. */ int_t row, col, i, j, relpos; int iam; char trans[1]; int_t *marker; iam = grid->iam; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter zcreate_matrix()"); #endif if ( !iam ) { double t = SuperLU_timer_(); /* Read the matrix stored on disk in Harwell-Boeing format. */ zreadhb_dist(iam, fp, &m, &n, &nnz, &nzval, &rowind, &colptr); printf("Time to read and distribute matrix %.2f\n", SuperLU_timer_() - t); fflush(stdout); /* Broadcast matrix A to the other PEs. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( nzval, nnz, SuperLU_MPI_DOUBLE_COMPLEX, 0, grid->comm ); MPI_Bcast( rowind, nnz, mpi_int_t, 0, grid->comm ); MPI_Bcast( colptr, n+1, mpi_int_t, 0, grid->comm ); } else { /* Receive matrix A from PE 0. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid->comm ); /* Allocate storage for compressed column representation. */ zallocateA_dist(n, nnz, &nzval, &rowind, &colptr); MPI_Bcast( nzval, nnz, SuperLU_MPI_DOUBLE_COMPLEX, 0, grid->comm ); MPI_Bcast( rowind, nnz, mpi_int_t, 0, grid->comm ); MPI_Bcast( colptr, n+1, mpi_int_t, 0, grid->comm ); } #if 0 nzval[0].r = 0.1; nzval[0].i = 0.0; #endif /* Compute the number of rows to be distributed to local process */ m_loc = m / (grid->nprow * grid->npcol); m_loc_fst = m_loc; /* When m / procs is not an integer */ if ((m_loc * grid->nprow * grid->npcol) != m) { /*m_loc = m_loc+1; m_loc_fst = m_loc;*/ if (iam == (grid->nprow * grid->npcol - 1)) /* last proc. gets all*/ m_loc = m - m_loc * (grid->nprow * grid->npcol - 1); } /* Create compressed column matrix for GA. */ zCreate_CompCol_Matrix_dist(&GA, m, n, nnz, nzval, rowind, colptr, SLU_NC, SLU_Z, SLU_GE); /* Generate the exact solution and compute the right-hand side. */ if ( !(b_global = doublecomplexMalloc_dist(m*nrhs)) ) ABORT("Malloc fails for b[]"); if ( !(xtrue_global = doublecomplexMalloc_dist(n*nrhs)) ) ABORT("Malloc fails for xtrue[]"); *trans = 'N'; zGenXtrue_dist(n, nrhs, xtrue_global, n); zFillRHS_dist(trans, nrhs, xtrue_global, n, &GA, b_global, m); /************************************************* * Change GA to a local A with NR_loc format * *************************************************/ rowptr = (int_t *) intMalloc_dist(m_loc+1); marker = (int_t *) intCalloc_dist(n); /* Get counts of each row of GA */ for (i = 0; i < n; ++i) for (j = colptr[i]; j < colptr[i+1]; ++j) ++marker[rowind[j]]; /* Set up row pointers */ rowptr[0] = 0; fst_row = iam * m_loc_fst; nnz_loc = 0; for (j = 0; j < m_loc; ++j) { row = fst_row + j; rowptr[j+1] = rowptr[j] + marker[row]; marker[j] = rowptr[j]; } nnz_loc = rowptr[m_loc]; nzval_loc = (doublecomplex *) doublecomplexMalloc_dist(nnz_loc); colind = (int_t *) intMalloc_dist(nnz_loc); /* Transfer the matrix into the compressed row storage */ for (i = 0; i < n; ++i) { for (j = colptr[i]; j < colptr[i+1]; ++j) { row = rowind[j]; if ( (row>=fst_row) && (row=2 ) if ( !iam ) zPrint_CompCol_Matrix_dist(&GA); #endif /* Destroy GA */ Destroy_CompCol_Matrix_dist(&GA); /******************************************************/ /* Change GA to a local A with NR_loc format */ /******************************************************/ /* Set up the local A in NR_loc format */ zCreate_CompRowLoc_Matrix_dist(A, m, n, nnz_loc, m_loc, fst_row, nzval_loc, colind, rowptr, SLU_NR_loc, SLU_Z, SLU_GE); /* Get the local B */ if ( !((*rhs) = doublecomplexMalloc_dist(m_loc*nrhs)) ) ABORT("Malloc fails for rhs[]"); for (j =0; j < nrhs; ++j) { for (i = 0; i < m_loc; ++i) { row = fst_row + i; (*rhs)[j*m_loc+i] = b_global[j*n+row]; } } *ldb = m_loc; /* Set the true X */ *ldx = m_loc; if ( !((*x) = doublecomplexMalloc_dist(*ldx * nrhs)) ) ABORT("Malloc fails for x_loc[]"); /* Get the local part of xtrue_global */ for (j = 0; j < nrhs; ++j) { for (i = 0; i < m_loc; ++i) (*x)[i + j*(*ldx)] = xtrue_global[i + fst_row + j*n]; } SUPERLU_FREE(b_global); SUPERLU_FREE(xtrue_global); SUPERLU_FREE(marker); #if ( DEBUGlevel>=1 ) printf("sizeof(NRforamt_loc) %lu\n", sizeof(NRformat_loc)); CHECK_MALLOC(iam, "Exit zcreate_matrix()"); #endif return 0; } SuperLU_DIST_5.3.0/TEST/dcreate_matrix.c0000644013363400111340000001544613233431301016564 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Read the matrix from data file * *
 * -- Distributed SuperLU routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * March 15, 2003
 * 
*/ #include #include "superlu_ddefs.h" /* \brief * *
 * Purpose
 * =======
 * 
 * DCREATE_MATRIX read the matrix from data file in Harwell-Boeing format,
 * and distribute it to processors in a distributed compressed row format.
 * It also generate the distributed true solution X and the right-hand
 * side RHS.
 *
 *
 * Arguments   
 * =========      
 *
 * A     (output) SuperMatrix*
 *       Local matrix A in NR_loc format. 
 *
 * NRHS  (input) int_t
 *       Number of right-hand sides.
 *
 * RHS   (output) double**
 *       The right-hand side matrix.
 *
 * LDB   (output) int*
 *       Leading dimension of the right-hand side matrix.
 *
 * X     (output) double**
 *       The true solution matrix.
 *
 * LDX   (output) int*
 *       The leading dimension of the true solution matrix.
 *
 * FP    (input) FILE*
 *       The matrix file pointer.
 *
 * GRID  (input) gridinof_t*
 *       The 2D process mesh.
 * 
*/ int dcreate_matrix(SuperMatrix *A, int nrhs, double **rhs, int *ldb, double **x, int *ldx, FILE *fp, gridinfo_t *grid) { SuperMatrix GA; /* global A */ double *b_global, *xtrue_global; /* replicated on all processes */ int_t *rowind, *colptr; /* global */ double *nzval; /* global */ double *nzval_loc; /* local */ int_t *colind, *rowptr; /* local */ int_t m, n, nnz; int_t m_loc, fst_row, nnz_loc; int_t m_loc_fst; /* Record m_loc of the first p-1 processors, when mod(m, p) is not zero. */ int_t row, col, i, j, relpos; int iam; char trans[1]; int_t *marker; iam = grid->iam; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter dcreate_matrix()"); #endif if ( !iam ) { double t = SuperLU_timer_(); /* Read the matrix stored on disk in Harwell-Boeing format. */ dreadhb_dist(iam, fp, &m, &n, &nnz, &nzval, &rowind, &colptr); printf("Time to read and distribute matrix %.2f\n", SuperLU_timer_() - t); fflush(stdout); /* Broadcast matrix A to the other PEs. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( nzval, nnz, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( rowind, nnz, mpi_int_t, 0, grid->comm ); MPI_Bcast( colptr, n+1, mpi_int_t, 0, grid->comm ); } else { /* Receive matrix A from PE 0. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid->comm ); /* Allocate storage for compressed column representation. */ dallocateA_dist(n, nnz, &nzval, &rowind, &colptr); MPI_Bcast( nzval, nnz, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( rowind, nnz, mpi_int_t, 0, grid->comm ); MPI_Bcast( colptr, n+1, mpi_int_t, 0, grid->comm ); } #if 0 nzval[0]=0.1; #endif /* Compute the number of rows to be distributed to local process */ m_loc = m / (grid->nprow * grid->npcol); m_loc_fst = m_loc; /* When m / procs is not an integer */ if ((m_loc * grid->nprow * grid->npcol) != m) { /*m_loc = m_loc+1; m_loc_fst = m_loc;*/ if (iam == (grid->nprow * grid->npcol - 1)) /* last proc. gets all*/ m_loc = m - m_loc * (grid->nprow * grid->npcol - 1); } /* Create compressed column matrix for GA. */ dCreate_CompCol_Matrix_dist(&GA, m, n, nnz, nzval, rowind, colptr, SLU_NC, SLU_D, SLU_GE); /* Generate the exact solution and compute the right-hand side. */ if ( !(b_global = doubleMalloc_dist(m*nrhs)) ) ABORT("Malloc fails for b[]"); if ( !(xtrue_global = doubleMalloc_dist(n*nrhs)) ) ABORT("Malloc fails for xtrue[]"); *trans = 'N'; dGenXtrue_dist(n, nrhs, xtrue_global, n); dFillRHS_dist(trans, nrhs, xtrue_global, n, &GA, b_global, m); /************************************************* * Change GA to a local A with NR_loc format * *************************************************/ rowptr = (int_t *) intMalloc_dist(m_loc+1); marker = (int_t *) intCalloc_dist(n); /* Get counts of each row of GA */ for (i = 0; i < n; ++i) for (j = colptr[i]; j < colptr[i+1]; ++j) ++marker[rowind[j]]; /* Set up row pointers */ rowptr[0] = 0; fst_row = iam * m_loc_fst; nnz_loc = 0; for (j = 0; j < m_loc; ++j) { row = fst_row + j; rowptr[j+1] = rowptr[j] + marker[row]; marker[j] = rowptr[j]; } nnz_loc = rowptr[m_loc]; nzval_loc = (double *) doubleMalloc_dist(nnz_loc); colind = (int_t *) intMalloc_dist(nnz_loc); /* Transfer the matrix into the compressed row storage */ for (i = 0; i < n; ++i) { for (j = colptr[i]; j < colptr[i+1]; ++j) { row = rowind[j]; if ( (row>=fst_row) && (row=2 ) if ( !iam ) dPrint_CompCol_Matrix_dist(&GA); #endif /* Destroy GA */ Destroy_CompCol_Matrix_dist(&GA); /******************************************************/ /* Change GA to a local A with NR_loc format */ /******************************************************/ /* Set up the local A in NR_loc format */ dCreate_CompRowLoc_Matrix_dist(A, m, n, nnz_loc, m_loc, fst_row, nzval_loc, colind, rowptr, SLU_NR_loc, SLU_D, SLU_GE); /* Get the local B */ if ( !((*rhs) = doubleMalloc_dist(m_loc*nrhs)) ) ABORT("Malloc fails for rhs[]"); for (j =0; j < nrhs; ++j) { for (i = 0; i < m_loc; ++i) { row = fst_row + i; (*rhs)[j*m_loc+i] = b_global[j*n+row]; } } *ldb = m_loc; /* Set the true X */ *ldx = m_loc; if ( !((*x) = doubleMalloc_dist(*ldx * nrhs)) ) ABORT("Malloc fails for x_loc[]"); /* Get the local part of xtrue_global */ for (j = 0; j < nrhs; ++j) { for (i = 0; i < m_loc; ++i) (*x)[i + j*(*ldx)] = xtrue_global[i + fst_row + j*n]; } SUPERLU_FREE(b_global); SUPERLU_FREE(xtrue_global); SUPERLU_FREE(marker); #if ( DEBUGlevel>=1 ) printf("sizeof(NRforamt_loc) %lu\n", sizeof(NRformat_loc)); CHECK_MALLOC(iam, "Exit dcreate_matrix()"); #endif return 0; } SuperLU_DIST_5.3.0/TEST/runtest.cmake0000644013363400111340000000066413233431301016127 0ustar xiaoyessg# execute the test command that was added earlier. execute_process( COMMAND ${TEST} OUTPUT_FILE ${OUTPUT} RESULT_VARIABLE RET ) file(APPEND ${ALL_OUTPUT} ${HEADING}) file(APPEND ${ALL_OUTPUT} "\n") #file(READ ${OUTPUT} SINGLE_OUTPUT) file(APPEND ${ALL_OUTPUT} OUTPUT_FILE) #file(REMOVE ${OUTPUT}) # remove the individual output file. if (NOT "${RET}" STREQUAL "0") message (FATAL_ERROR "TEST FAILED!") endif() SuperLU_DIST_5.3.0/TEST/pdtest.c0000644013363400111340000004120413233431301015063 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Driver program for testing PDGSSVX. * *
 * -- Distributed SuperLU routine (version 5.2) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 30, 2017
 * 
*/ /* * File name: pdtest.c * Purpose: MAIN test program */ #include #include //#include #ifdef _MSC_VER #include #else #include #endif #include #include "superlu_dist_config.h" #include "superlu_ddefs.h" #define NTESTS 1 /*5*/ /* Number of test types */ #define NTYPES 11 /* Number of matrix types */ #define NTRAN 2 #define THRESH 20.0 #define FMT1 "%10s:n=%d, test(%d)=%12.5g\n" #define FMT2 "%10s:fact=%4d, DiagScale=%d, n=%d, imat=%d, test(%d)=%12.5g, berr=%12.5g\n" #define FMT3 "%10s:info=%d, izero=%d, n=%d, nrhs=%d, imat=%d, nfail=%d\n" static void parse_command_line(int argc, char *argv[], int *nprow, int *npcol, char *matrix_type, int *n, int *relax, int *maxsuper, int *fill_ratio, int *min_gemm_gpu_offload, int *nrhs, FILE **fp); extern int pdcompute_resid(int m, int n, int nrhs, SuperMatrix *A, double *x, int ldx, double *b, int ldb, gridinfo_t *grid, SOLVEstruct_t *SOLVEstruct, double *resid); /*! \brief Copy matrix A into matrix B, in distributed compressed row format. */ void dCopy_CompRowLoc_Matrix_dist(SuperMatrix *A, SuperMatrix *B) { NRformat_loc *Astore; NRformat_loc *Bstore; int_t i, nnz_loc, m_loc; B->Stype = A->Stype; B->Dtype = A->Dtype; B->Mtype = A->Mtype; B->nrow = A->nrow;; B->ncol = A->ncol; Astore = (NRformat_loc *) A->Store; Bstore = (NRformat_loc *) B->Store; Bstore->nnz_loc = Astore->nnz_loc; nnz_loc = Astore->nnz_loc; Bstore->m_loc = Astore->m_loc; m_loc = Astore->m_loc; Bstore->fst_row = Astore->fst_row; memcpy(Bstore->nzval, Astore->nzval, nnz_loc * sizeof(double)); memcpy(Bstore->colind, Astore->colind, nnz_loc * sizeof(int_t)); memcpy(Bstore->rowptr, Astore->rowptr, (m_loc+1) * sizeof(int_t)); } /*! \brief Print a summary of the testing results. */ void PrintSumm(char *type, int nfail, int nrun, int nerrs) { if ( nfail > 0 ) printf("%3s driver: %d out of %d tests failed to pass the threshold\n", type, nfail, nrun); else printf("All tests for %3s driver passed the threshold (%6d tests run)\n", type, nrun); if ( nerrs > 0 ) printf("%6d error messages recorded\n", nerrs); } int main(int argc, char *argv[]) { /* *
 * Purpose
 * =======
 *
 * PDTEST is the main test program for the DOUBLE linear 
 * equation driver routines PDGSSVX.
 * 
 * The program is invoked by a shell script file -- dtest.csh.
 * The output from the tests are written into a file -- dtest.out.
 */
    superlu_dist_options_t options;
    SuperLUStat_t stat;
    SuperMatrix A, Asave;
    NRformat_loc *Astore;
    ScalePermstruct_t ScalePermstruct;
    LUstruct_t LUstruct;
    SOLVEstruct_t SOLVEstruct;
    gridinfo_t grid;
    double   *nzval_save;
    int_t    *colind_save, *rowptr_save;
    double   *berr, *R, *C;
    double   *b, *bsave, *xtrue, *solx;
    int    i, j, m, n, izero = 0;
    int    nprow, npcol;
    int    iam, info, ldb, ldx, nrhs;
    int_t  iinfo;
    char     **cpp, c;
    FILE *fp, *fopen();
    char matrix_type[8], equed[1];
    int  relax, maxsuper=sp_ienv_dist(3), fill_ratio=sp_ienv_dist(6),
         min_gemm_gpu_offload=0;
    int    equil, ifact, nfact, iequil, iequed, prefact, notfactored;
    int    nt, nrun=0, nfail=0, nerrs=0, imat, fimat=0, nimat=1;
    fact_t fact;
    double rowcnd, colcnd, amax;
    double result[NTESTS];

    /* Fixed set of parameters */
    int     iseed[]  = {1988, 1989, 1990, 1991};
    char    equeds[]  = {'N', 'R', 'C', 'B'};
    DiagScale_t equils[] = {NOEQUIL, ROW, COL, BOTH};
    fact_t  facts[] = {FACTORED, DOFACT, SamePattern, SamePattern_SameRowPerm};
    trans_t transs[]  = {NOTRANS, TRANS, CONJ};

    nprow = 1;  /* Default process rows.      */
    npcol = 1;  /* Default process columns.   */
    nrhs = 1;   /* Number of right-hand side. */
    for (i = 0; i < NTESTS; ++i) result[i] = 0.0;

    /* Parse command line argv[]. */
    parse_command_line(argc, argv, &nprow, &npcol, matrix_type, &n,
		       &relax, &maxsuper,
		       &fill_ratio, &min_gemm_gpu_offload, &nrhs, &fp);

    /* ------------------------------------------------------------
       INITIALIZE MPI ENVIRONMENT. 
       ------------------------------------------------------------*/
    MPI_Init( &argc, &argv );

    /* ------------------------------------------------------------
       INITIALIZE THE SUPERLU PROCESS GRID. 
       ------------------------------------------------------------*/
    superlu_gridinit(MPI_COMM_WORLD, nprow, npcol, &grid);

    /* Bail out if I do not belong in the grid. */
    iam = grid.iam;
    if ( iam >= nprow * npcol )	goto out;
    if ( 0 ) {
        printf("\tProcess grid\t%d X %d\n", (int)grid.nprow, (int)grid.npcol);
	fflush(stdout);
    }

#if ( DEBUGlevel>=1 )
    CHECK_MALLOC(iam, "Enter main()");
#endif

    /* Set the default input options. */
    set_default_options_dist(&options);
    options.PrintStat = NO;
	
    if (!iam) {
	print_sp_ienv_dist(&options);
	print_options_dist(&options);
	fflush(stdout);
    }

    if ( !(berr = doubleMalloc_dist(nrhs)) )
	ABORT("Malloc fails for berr[].");
	
    /* Loop through all the input options. */
    for (imat = fimat; imat < nimat; ++imat) { /* All matrix types */
	//if (!iam) printf("imat loop ... %d\n", imat);
	/* ------------------------------------------------------------
	   GET THE MATRIX FROM FILE AND SETUP THE RIGHT HAND SIDE. 
	   ------------------------------------------------------------*/
	dcreate_matrix(&A, nrhs, &b, &ldb, &xtrue, &ldx, fp, &grid);

	m = A.nrow;
	n = A.ncol;

	if ( !(bsave = doubleMalloc_dist(ldb * nrhs)) )
	    ABORT("Malloc fails for bsave[]");
	for (j = 0; j < nrhs; ++j)
	    for (i = 0; i < ldb; ++i) bsave[i+j*ldb] = b[i+j*ldb];

	/* Save a copy of matrix A in Asave. */
	Astore = (NRformat_loc *) A.Store;
	int_t nnz_loc = Astore->nnz_loc;
	int_t m_loc = Astore->m_loc;
	nzval_save = (double *) doubleMalloc_dist(nnz_loc);
	colind_save = (int_t *) intMalloc_dist(nnz_loc);
	rowptr_save = (int_t *) intMalloc_dist(m_loc + 1);
	dCreate_CompRowLoc_Matrix_dist(&Asave, m, n, nnz_loc, m_loc, Astore->fst_row,
				       nzval_save, colind_save, rowptr_save,
				       SLU_NR_loc, SLU_D, SLU_GE);
	dCopy_CompRowLoc_Matrix_dist(&A, &Asave);

	for (iequed = 0; iequed < 4; ++iequed) {
	    int what_equil = equils[iequed];
	    if (iequed == 0) nfact = 4;
	    else { /* Only test factored, pre-equilibrated matrix */
		nfact = 1;
		options.RowPerm = NOROWPERM; /* Turn off MC64 */
	    }
	    //if (!iam) printf("iequed loop ... %d\n", iequed);

	    for (ifact = 0; ifact < nfact; ++ifact) {
		fact = facts[ifact];
		options.Fact = fact;
		//if (!iam) printf("ifact loop ... %d\n", ifact);

		for (equil = 0; equil < 2; ++equil) {

		    //if (!iam) printf("equil loop ... %d\n", equil);

		    options.Equil = equil;
		    /* Need a first factor */
		    prefact   = ( options.Fact == FACTORED ||
				  options.Fact == SamePattern ||
				  options.Fact == SamePattern_SameRowPerm );

		    /* Restore the matrix A. */
		    dCopy_CompRowLoc_Matrix_dist(&Asave, &A);

		    /* Initialize ScalePermstruct and LUstruct. */
		    ScalePermstructInit(m, n, &ScalePermstruct);
		    LUstructInit(n, &LUstruct);

		    //if ( options.Fact == FACTORED || 
		    // options.Fact == SamePattern_SameRowPerm ) {

		    if ( prefact ) {

			R = (double *) SUPERLU_MALLOC(m*sizeof(double));
			C = (double *) SUPERLU_MALLOC(n*sizeof(double));
			
			/* Later call to PDGSSVX only needs to solve. */
                        if ( equil || iequed ) {
			    /* Compute row and column scale factors to
			       equilibrate matrix A.    */
			    pdgsequ(&A, R, C, &rowcnd, &colcnd, &amax, &iinfo,
				    &grid);

			    /* Force equilibration. */
			    if ( iinfo==0 && n > 0 ) {
				if ( what_equil == ROW ) {
				    rowcnd = 0.;
				    colcnd = 1.;
				    ScalePermstruct.DiagScale = ROW;
				    ScalePermstruct.R = R;
				} else if ( what_equil == COL ) {
				    rowcnd = 1.;
				    colcnd = 0.;
				    ScalePermstruct.DiagScale = COL;
				    ScalePermstruct.C = C;
				} else if ( what_equil == BOTH ) {
				    rowcnd = 0.;
				    colcnd = 0.;
				    ScalePermstruct.DiagScale = BOTH;
				    ScalePermstruct.R = R;
				    ScalePermstruct.C = C;
				}
			    }
			
			    /* Equilibrate the matrix. */
			    pdlaqgs(&A, R, C, rowcnd, colcnd, amax, equed);
			    // printf("after pdlaqgs: *equed %c\n", *equed);

			    /* Not equilibrate anymore when calling PDGSSVX,.
			     * so, no malloc/free {R,C} inside PDGSSVX. */
			    options.Equil = NO;
			} /* end if (equil || iequed) */
		    } /* end if prefact */

		    if ( prefact ) { /* Need a first factor */
			
		        /* Save Fact option. */
		        fact = options.Fact;
			options.Fact = DOFACT;

			/* Initialize the statistics variables. */
			PStatInit(&stat);
	
			int nrhs1 = 0; /* Only performs factorization */
			pdgssvx(&options, &A, &ScalePermstruct, b, ldb, nrhs1,
				&grid, &LUstruct, &SOLVEstruct,
				berr, &stat, &info);

			if ( info ) {
			    printf("** First factor: nrun %d: fact %d, info %d, "
				   "equil %d, what_equil %d, DiagScale %d \n",
				   nrun, fact, info, equil, what_equil,
				   ScalePermstruct.DiagScale);
			}

			PStatFree(&stat);

		        /* Restore Fact option. */
			options.Fact = fact;
			if ( fact == SamePattern ) {
			    // {L,U} not re-used in subsequent call to PDGSSVX.
			    Destroy_LU(n, &grid, &LUstruct);
			}

		    } /* end if .. first time factor */

		    /*----------------
		     * Test pdgssvx
		     *----------------*/

		    if ( options.Fact != FACTORED ) {
			/* Restore the matrix A. */
			dCopy_CompRowLoc_Matrix_dist(&Asave, &A);
		    } 

		    /* Set the right-hand side. */
		    dCopy_Dense_Matrix_dist(m_loc, nrhs, bsave, ldb, b, ldb);

		    PStatInit(&stat);

		    /*if ( !iam ) printf("\ttest pdgssvx: nrun %d, iequed %d, equil %d, fact %d\n", 
		      nrun, iequed, equil, options.Fact);*/
		    /* Testing PDGSSVX: solve and compute the error bounds. */
		    pdgssvx(&options, &A, &ScalePermstruct, b, ldb, nrhs,
			    &grid, &LUstruct, &SOLVEstruct,
			    berr, &stat, &info);

		    PStatFree(&stat);
#if 0
		    pdinf_norm_error(iam, ((NRformat_loc *)A.Store)->m_loc,
				     nrhs, b, ldb, xtrue, ldx, &grid);
#endif
		    /*		    if ( info && info != izero ) {*/
		    if ( info ) {
			printf(FMT3, "pdgssvx",info,izero,n,nrhs,imat,nfail);
		    } else {
			/* Restore the matrix A. */
			dCopy_CompRowLoc_Matrix_dist(&Asave, &A);

			/* Compute residual of the computed solution.*/
			solx = b;
			pdcompute_resid(m, n, nrhs, &A, solx, ldx, bsave, ldb,
					&grid, &SOLVEstruct, &result[0]);
			
#if 0  /* how to get RCOND? */
			/* Check solution accuracy from generated exact solution. */
			dgst04(n, nrhs, solx, ldx, xact, ldx, rcond,
					  &result[2]);
			pdinf_norm_error(iam, ((NRformat_loc *)A.Store)->m_loc,
					 nrhs, b, ldb, xtrue, ldx, &grid);
#endif

			/* Print information about the tests that did
			   not pass the threshold.    */
			int k1 = 0;
			for (i = k1; i < NTESTS; ++i) {
			    if ( result[i] >= THRESH ) {
				printf(FMT2, "pdgssvx", options.Fact, 
				       ScalePermstruct.DiagScale,
				       n, imat, i, result[i], berr[0]);
				++nfail;
			    }
			}
			nrun += NTESTS;
		    } /* end else .. info == 0 */
		   
		    /* -----------------------------------------------------
		       Deallocate storage associated with {L,U}.
		       ----------------------------------------------------- */
		    if ( prefact ) {
			SUPERLU_FREE(R);
			SUPERLU_FREE(C);
			ScalePermstruct.DiagScale = NOEQUIL; /* Avoid free R/C again. */
		    }
		    ScalePermstructFree(&ScalePermstruct);
		    Destroy_LU(n, &grid, &LUstruct);
		    LUstructFree(&LUstruct);
		    if ( options.SolveInitialized ) {
			dSolveFinalize(&options, &SOLVEstruct);
		    }

		} /* end for equil ... */
		    
	    } /* end for ifact ... */
		
	} /* end for iequed ... */
	
	/* ------------------------------------------------------------
	   DEALLOCATE STORAGE.
	   ------------------------------------------------------------*/
	Destroy_CompRowLoc_Matrix_dist(&A);
	Destroy_CompRowLoc_Matrix_dist(&Asave);
	//	ScalePermstructFree(&ScalePermstruct);
	SUPERLU_FREE(b);
	SUPERLU_FREE(bsave);
	SUPERLU_FREE(xtrue);

    } /* end for imat ... */

    /* Print a summary of the testing results. */
    if ( iam==0 ) PrintSumm("DGS", nfail, nrun, nerrs);

    SUPERLU_FREE(berr);

    /* ------------------------------------------------------------
       RELEASE THE SUPERLU PROCESS GRID.
       ------------------------------------------------------------*/
out:
    superlu_gridexit(&grid);

    /* ------------------------------------------------------------
       TERMINATES THE MPI EXECUTION ENVIRONMENT.
       ------------------------------------------------------------*/
    MPI_Finalize();

#if ( DEBUGlevel>=1 )
    CHECK_MALLOC(iam, "Exit main()");
#endif

}

/*  
 * Parse command line options to get various input parameters.
 */
static void
parse_command_line(int argc, char *argv[], int *nprow, int *npcol,
		   char *matrix_type, int *n, int *relax, int *maxsuper,
		   int *fill_ratio, int *min_gemm_gpu_offload,
		   int *nrhs, FILE **fp)
{
    int c;
    extern char *optarg;
    char  str[20];
    char *xenvstr, *menvstr, *benvstr, *genvstr;
    xenvstr = menvstr = benvstr = genvstr = 0;

    while ( (c = getopt(argc, argv, "hr:c:t:n:x:m:b:g:s:f:")) != EOF ) {
	switch (c) {
	  case 'h':
	    printf("Options:\n");
	    printf("\t-r  - process rows\n");
	    printf("\t-c  - process columns\n");
	    printf("\t-n  - matrix dimension\n");
	    printf("\t-x  - granularity of relaxed supernodes\n");
	    printf("\t-m  - maximum size of supernode\n");
	    printf("\t-b  - estimated fill ratio to allocate storage\n");
	    printf("\t-g  - minimum size of GEMM to offload to GPU\n");
	    printf("\t-s  - number of right-hand sides\n");
	    printf("\t-f  - file name storing a sparse matrix\n");
	    exit(1);
	    break;
	  case 'r': *nprow = atoi(optarg);
	            break;
	  case 'c': *npcol = atoi(optarg);
	            break;
	  case 'n': *n = atoi(optarg);
	            break;
// Use putenv as exists on Windows
#ifdef _MSC_VER
#define putenv _putenv
#endif
	  case 'x': // c = atoi(optarg); 
	            // sprintf(str, "%d", c);
	            // setenv("NREL", str, 1);
		    xenvstr = (char*) malloc((6+strlen(optarg))*sizeof(char));
		    strcpy(xenvstr, "NREL=");
		    strcat(xenvstr, optarg);
		    putenv(xenvstr);
	            //printf("Reset relax env. variable to %d\n", c);
	            break;
	  case 'm': // c = atoi(optarg); 
	            // sprintf(str, "%d", c);
		    // setenv("NSUP", str, 1);
		    menvstr = (char*) malloc((6+strlen(optarg))*sizeof(char));
		    strcpy(menvstr, "NSUP=");
		    strcat(menvstr, optarg);
		    putenv(menvstr);
		    //printf("Reset maxsuper env. variable to %d\n", c);
	            break;
	  case 'b': // c = atoi(optarg); 
	            // sprintf(str, "%d", c);
		    // setenv("FILL", str, 1);
		    benvstr = (char*) malloc((6+strlen(optarg))*sizeof(char));
		    strcpy(benvstr, "FILL=");
		    strcat(benvstr, optarg);
		    putenv(benvstr);
		    //printf("Reset fill_ratio env. variable to %d\n", c);
	            break;
	  case 'g': // c = atoi(optarg); 
	            // sprintf(str, "%d", c);
		    // setenv("N_GEMM", str, 1);
		    genvstr = (char*) malloc((8+strlen(optarg))*sizeof(char));
		    strcpy(genvstr, "N_GEMM=");
		    strcat(genvstr, optarg);
		    putenv(genvstr);
		    //printf("Reset min_gemm_gpu_offload env. variable to %d\n", c);
	            break;
	  case 's': *nrhs = atoi(optarg); 
	            break;
          case 'f':
                    if ( !(*fp = fopen(optarg, "r")) ) {
                        ABORT("File does not exist");
                    }
                    //printf(".. test sparse matrix in file: %s\n", optarg);
                    break;
  	}
    }
}

int cpp_defs()
{
    printf(".. CPP definitions:\n");
#if ( PRNTlevel>=1 )
    printf("\tPRNTlevel = %d\n", PRNTlevel);
#endif
#if ( DEBUGlevel>=1 )
    printf("\tDEBUGlevel = %d\n", DEBUGlevel);
#endif
#if ( PROFlevel>=1 )
    printf("\tPROFlevel = %d\n", PROFlevel);
#endif
#if ( StaticPivot>=1 )
    printf("\tStaticPivot = %d\n", StaticPivot);
#endif
    printf("....\n");
    return 0;
}
SuperLU_DIST_5.3.0/TEST/pzcompute_resid.c0000644013363400111340000001101013233431301016764 0ustar  xiaoyessg/*! \file
Copyright (c) 2003, The Regents of the University of California, through
Lawrence Berkeley National Laboratory (subject to receipt of any required 
approvals from U.S. Dept. of Energy) 

All rights reserved. 

The source code is distributed under BSD license, see the file License.txt
at the top-level directory.
*/

/*! @file
 * \brief Test for small residual.
 *
 * -- Distributed SuperLU routine (version 5.2) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 30, 2017
 *
 */
#include "superlu_zdefs.h"

int pzcompute_resid(int m, int n, int nrhs, SuperMatrix *A,
		    doublecomplex *x, int ldx, doublecomplex *b, int ldb,
		    gridinfo_t *grid, SOLVEstruct_t *SOLVEstruct, double *resid)
{
/*  
    Purpose   
    =======   

    PZCOMPUTE_RESID computes the residual for a solution of a system of linear   
    equations  A*x = b  or  A'*x = b:   
       RESID = norm(B - A*X) / ( norm(A) * norm(X) * EPS ),   
    where EPS is the machine epsilon.   

    Arguments   
    =========   

    M       (input) INTEGER   
            The number of rows of the matrix A.  M >= 0.   

    N       (input) INTEGER   
            The number of columns of the matrix A.  N >= 0.   

    NRHS    (input) INTEGER   
            The number of columns of B, the matrix of right hand sides.   
            NRHS >= 0.
	    
    A       (input/output) SuperMatrix*
            The original M x N sparse matrix A.   
	    On exit, the column indices are modified due to SPMV setup.

    X       (input) DOUBLE COMPLEX PRECISION array, dimension (LDX,NRHS)   
            The computed solution vectors for the system of linear   
            equations.   

    LDX     (input) INTEGER   
            The leading dimension of the array X.  If TRANS = NOTRANS,   
            LDX >= max(1,N); if TRANS = TRANS or CONJ, LDX >= max(1,M).   

    B       (input/output) DOUBLE COMPLEX PRECISION array, dimension (LDB,NRHS)   
            On entry, the right hand side vectors for the system of   
            linear equations.   
            On exit, B is overwritten with the difference B - A*X.   

    LDB     (input) INTEGER   
            The leading dimension of the array B.  IF TRANS = NOTRANS,
            LDB >= max(1,M); if TRANS = TRANS or CONJ, LDB >= max(1,N).

    SOLVEstruct (input) SOLVEstruct_t*

    GRID    (input) gridinfo_t*
	    
    RESID   (output) double PRECISION   
            The maximum over the number of right-hand sides of
            norm(B - A*X) / ( norm(A) * norm(X) * EPS ).   

    =====================================================================
*/

    /* Table of constant values */
    int    inc  = 1;
    
    /* Local variables */
    int i, j;
    double anorm, rnorm, rnorm_g;
    double xnorm, xnorm_g;
    double eps;
    char transc[1];
    doublecomplex *ax, *R;
    pzgsmv_comm_t gsmv_comm; 
    int m_loc = ((NRformat_loc*) A->Store)->m_loc;

    /* Function prototypes */
    extern double dzasum_(int *, doublecomplex *, int *);
    
    /* Function Body */
    if ( m <= 0 || n <= 0 || nrhs == 0) {
	*resid = 0.;
	return 0;
    }

    /* Exit with RESID = 1/EPS if ANORM = 0. */
    eps = dmach_dist("Epsilon");
    anorm = pzlangs("1", A, grid);
    if (anorm <= 0.) {
	*resid = 1. / eps;
	return 0;
    }

    if ( !(ax = doublecomplexMalloc_dist(m_loc)) ) ABORT("Malloc fails for work[]");
    R = ax;

    /* A is modified with colind[] permuted to [internal, external]. */
    pzgsmv_init(A, SOLVEstruct->row_to_proc, grid, &gsmv_comm);

    /* Compute the maximum over the number of right-hand sides of   
       norm(B - A*X) / ( norm(A) * norm(X) * EPS ) . */
    *resid = 0.;
    for (j = 0; j < nrhs; ++j) {
	doublecomplex *B_col = &b[j*ldb];
	doublecomplex *X_col = &x[j*ldx];

	/* Compute residual R = B - op(A) * X,   
	   where op(A) = A, A**T, or A**H, depending on TRANS. */
	/* Matrix-vector multiply. */
	pzgsmv(0, A, grid, &gsmv_comm, X_col, ax);
	    
	/* Compute residual, stored in R[]. */
	for (i = 0; i < m_loc; ++i) z_sub(&R[i], &B_col[i], &ax[i]);

	rnorm = dzasum_(&m_loc, R, &inc);
	xnorm = dzasum_(&m_loc, X_col, &inc);

	/* */
	MPI_Allreduce( &rnorm, &rnorm_g, 1, MPI_DOUBLE, MPI_SUM, grid->comm );
	MPI_Allreduce( &xnorm, &xnorm_g, 1, MPI_DOUBLE, MPI_SUM, grid->comm );
		
	if (xnorm_g <= 0.) {
	    *resid = 1. / eps;
	} else {
	    /* Computing MAX */
	    double d1, d2;
	    d1 = *resid;
	    d2 = rnorm_g / anorm / xnorm_g / eps;
	    *resid = SUPERLU_MAX(d1, d2);
	}
    } /* end for j ... */

    pzgsmv_finalize(&gsmv_comm);
    SUPERLU_FREE(ax);

    return 0;

} /* pzcompute_redid */
SuperLU_DIST_5.3.0/TEST/pdcompute_resid.c0000644013363400111340000001067513233431301016756 0ustar  xiaoyessg/*! \file
Copyright (c) 2003, The Regents of the University of California, through
Lawrence Berkeley National Laboratory (subject to receipt of any required 
approvals from U.S. Dept. of Energy) 

All rights reserved. 

The source code is distributed under BSD license, see the file License.txt
at the top-level directory.
*/


/*! @file
 * \brief Test for small residual.
 *
 * -- Distributed SuperLU routine (version 5.2) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 30, 2017
 *
 */
#include "superlu_ddefs.h"

int pdcompute_resid(int m, int n, int nrhs, SuperMatrix *A,
		    double *x, int ldx, double *b, int ldb,
		    gridinfo_t *grid, SOLVEstruct_t *SOLVEstruct, double *resid)
{
/*  
    Purpose   
    =======   

    PDCOMPUTE_RESID computes the residual for a solution of a system of linear   
    equations  A*x = b  or  A'*x = b:   
       RESID = norm(B - A*X) / ( norm(A) * norm(X) * EPS ),   
    where EPS is the machine epsilon.   

    Arguments   
    =========   

    M       (input) INTEGER   
            The number of rows of the matrix A.  M >= 0.   

    N       (input) INTEGER   
            The number of columns of the matrix A.  N >= 0.   

    NRHS    (input) INTEGER   
            The number of columns of B, the matrix of right hand sides.   
            NRHS >= 0.
	    
    A       (input/output) SuperMatrix*
            The original M x N sparse matrix A.   
	    On exit, the column indices are modified due to SPMV setup.

    X       (input) DOUBLE PRECISION array, dimension (LDX,NRHS)   
            The computed solution vectors for the system of linear   
            equations.   

    LDX     (input) INTEGER   
            The leading dimension of the array X.  If TRANS = NOTRANS,   
            LDX >= max(1,N); if TRANS = TRANS or CONJ, LDX >= max(1,M).   

    B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)   
            On entry, the right hand side vectors for the system of   
            linear equations.   
            On exit, B is overwritten with the difference B - A*X.   

    LDB     (input) INTEGER   
            The leading dimension of the array B.  IF TRANS = NOTRANS,
            LDB >= max(1,M); if TRANS = TRANS or CONJ, LDB >= max(1,N).

    SOLVEstruct (input) SOLVEstruct_t*

    GRID    (input) gridinfo_t*
	    
    RESID   (output) double PRECISION   
            The maximum over the number of right-hand sides of
            norm(B - A*X) / ( norm(A) * norm(X) * EPS ).   

    =====================================================================
*/

    /* Table of constant values */
    int    inc  = 1;
    
    /* Local variables */
    int i, j;
    double anorm, rnorm, rnorm_g;
    double xnorm, xnorm_g;
    double eps;
    char transc[1];
    double *ax, *R;
    pdgsmv_comm_t gsmv_comm; 
    int m_loc = ((NRformat_loc*) A->Store)->m_loc;

    /* Function prototypes */
    extern double dasum_(int *, double *, int *);
    
    /* Function Body */
    if ( m <= 0 || n <= 0 || nrhs == 0) {
	*resid = 0.;
	return 0;
    }

    /* Exit with RESID = 1/EPS if ANORM = 0. */
    eps = dmach_dist("Epsilon");
    anorm = pdlangs("1", A, grid);
    if (anorm <= 0.) {
	*resid = 1. / eps;
	return 0;
    }

    if ( !(ax = doubleMalloc_dist(m_loc)) ) ABORT("Malloc fails for work[]");
    R = ax;

    /* A is modified with colind[] permuted to [internal, external]. */
    pdgsmv_init(A, SOLVEstruct->row_to_proc, grid, &gsmv_comm);

    /* Compute the maximum over the number of right-hand sides of   
       norm(B - A*X) / ( norm(A) * norm(X) * EPS ) . */
    *resid = 0.;
    for (j = 0; j < nrhs; ++j) {
	double *B_col = &b[j*ldb];
	double *X_col = &x[j*ldx];

	/* Compute residual R = B - op(A) * X,   
	   where op(A) = A, A**T, or A**H, depending on TRANS. */
	/* Matrix-vector multiply. */
	pdgsmv(0, A, grid, &gsmv_comm, X_col, ax);
	    
	/* Compute residual, stored in R[]. */
	for (i = 0; i < m_loc; ++i) R[i] = B_col[i] - ax[i];

	rnorm = dasum_(&m_loc, R, &inc);
	xnorm = dasum_(&m_loc, X_col, &inc);

	/* */
	MPI_Allreduce( &rnorm, &rnorm_g, 1, MPI_DOUBLE, MPI_SUM, grid->comm );
	MPI_Allreduce( &xnorm, &xnorm_g, 1, MPI_DOUBLE, MPI_SUM, grid->comm );
		
	if (xnorm_g <= 0.) {
	    *resid = 1. / eps;
	} else {
	    /* Computing MAX */
	    double d1, d2;
	    d1 = *resid;
	    d2 = rnorm_g / anorm / xnorm_g / eps;
	    *resid = SUPERLU_MAX(d1, d2);
	}
    } /* end for j ... */

    pdgsmv_finalize(&gsmv_comm);
    SUPERLU_FREE(ax);

    return 0;

} /* pdcompute_redid */
SuperLU_DIST_5.3.0/.travis_tests.sh0000755013363400111340000000356213233431301016011 0ustar  xiaoyessg#!/bin/sh
set -e

export RED="\033[31;1m"
export BLUE="\033[34;1m"
printf "${BLUE} GC; Entered tests file:\n"

export DATA_FOLDER=$TRAVIS_BUILD_DIR/EXAMPLE
export EXAMPLE_FOLDER=$TRAVIS_BUILD_DIR/build/EXAMPLE
export TEST_FOLDER=$TRAVIS_BUILD_DIR/build/TEST

case "${TEST_NUMBER}" in
1)  mpirun "-n" "1" "$TEST_FOLDER/pdtest" "-r" "1" "-c" "1" "-s" "1" "-b" "2" "-x" "8" "-m" "20" "-f" "$DATA_FOLDER/g20.rua" ;;
2)  mpirun "-n" "1" "$TEST_FOLDER/pdtest" "-r" "1" "-c" "1" "-s" "3" "-b" "2" "-x" "8" "-m" "20" "-f" "$DATA_FOLDER/g20.rua" ;;
3)  mpirun "-n" "3" "$TEST_FOLDER/pdtest" "-r" "1" "-c" "3" "-s" "1" "-b" "2" "-x" "8" "-m" "20" "-f" "$DATA_FOLDER/g20.rua" ;;
4)  mpirun "-n" "3" "$TEST_FOLDER/pdtest" "-r" "1" "-c" "3" "-s" "3" "-b" "2" "-x" "8" "-m" "20" "-f" "$DATA_FOLDER/g20.rua" ;;
5)  mpirun "-n" "2" "$TEST_FOLDER/pdtest" "-r" "2" "-c" "1" "-s" "1" "-b" "2" "-x" "8" "-m" "20" "-f" "$DATA_FOLDER/g20.rua" ;;
6)  mpirun "-n" "2" "$TEST_FOLDER/pdtest" "-r" "2" "-c" "1" "-s" "3" "-b" "2" "-x" "8" "-m" "20" "-f" "$DATA_FOLDER/g20.rua" ;;
7)  mpirun "-n" "6" "$TEST_FOLDER/pdtest" "-r" "2" "-c" "3" "-s" "1" "-b" "2" "-x" "8" "-m" "20" "-f" "$DATA_FOLDER/g20.rua" ;;
8)  mpirun "-n" "6" "$TEST_FOLDER/pdtest" "-r" "2" "-c" "3" "-s" "3" "-b" "2" "-x" "8" "-m" "20" "-f" "$DATA_FOLDER/g20.rua" ;;
9)  mpirun "-n" "4" "$EXAMPLE_FOLDER/pddrive1" "-r" "2" "-c" "2" "$DATA_FOLDER/big.rua" ;;
10) mpirun "-n" "4" "$EXAMPLE_FOLDER/pddrive2" "-r" "2" "-c" "2" "$DATA_FOLDER/big.rua" ;;
11) mpirun "-n" "4" "$EXAMPLE_FOLDER/pddrive3" "-r" "2" "-c" "2" "$DATA_FOLDER/big.rua" ;;
12) mpirun "-n" "4" "$EXAMPLE_FOLDER/pzdrive1" "-r" "2" "-c" "2" "$DATA_FOLDER/cg20.cua" ;;
13) mpirun "-n" "4" "$EXAMPLE_FOLDER/pzdrive2" "-r" "2" "-c" "2" "$DATA_FOLDER/cg20.cua" ;;
14) mpirun "-n" "4" "$EXAMPLE_FOLDER/pzdrive3" "-r" "2" "-c" "2" "$DATA_FOLDER/cg20.cua" ;;
*) printf "${RED} ###GC: Unknown test\n" ;;
esac
SuperLU_DIST_5.3.0/Makefile0000644013363400111340000000155613233431301014303 0ustar  xiaoyessg############################################################################
#
#  Program:         SuperLU_DIST
#
#  Module:          Makefile
#
#  Purpose:         Top-level Makefile
#
#  Creation date:   September 1, 1999  version 1.0
#
#  Modified:        
#
############################################################################

include make.inc

all: lib install example

lib: superlulib

example:
	( cd EXAMPLE; $(MAKE) )

clean: cleanlib cleantesting

install:
	( cd INSTALL; $(MAKE) )
#	( cd INSTALL; cp lsame.c ../SRC/; \
#	  cp dlamch.c ../SRC/; cp slamch.c ../SRC/ )

blaslib:
	( cd CBLAS; $(MAKE) )

superlulib:
	( cd SRC; $(MAKE) )

cleanlib:
	( cd SRC; $(MAKE) clean )
	( cd CBLAS; $(MAKE) clean )
	( cd lib; rm -f *.a )

cleantesting:
	( cd INSTALL; $(MAKE) clean )
	( cd EXAMPLE; $(MAKE) clean )
	( cd FORTRAN; $(MAKE) clean )
	( cd TEST; $(MAKE) clean )
SuperLU_DIST_5.3.0/CBLAS/0000755013363400111340000000000013233431301013460 5ustar  xiaoyessgSuperLU_DIST_5.3.0/CBLAS/dasum.c0000644013363400111340000000336713233431301014746 0ustar  xiaoyessg
/*  -- translated by f2c (version 19940927).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/

#include "f2c.h"

doublereal dasum_(integer *n, doublereal *dx, integer *incx)
{


    /* System generated locals */
    integer i__1, i__2;
    doublereal ret_val, d__1, d__2, d__3, d__4, d__5, d__6;

    /* Local variables */
    static integer i, m;
    static doublereal dtemp;
    static integer nincx, mp1;


/*     takes the sum of the absolute values.   
       jack dongarra, linpack, 3/11/78.   
       modified 3/93 to return if incx .le. 0.   
       modified 12/3/93, array(1) declarations changed to array(*)   


    
   Parameter adjustments   
       Function Body */
#define DX(I) dx[(I)-1]


    ret_val = 0.;
    dtemp = 0.;
    if (*n <= 0 || *incx <= 0) {
	return ret_val;
    }
    if (*incx == 1) {
	goto L20;
    }

/*        code for increment not equal to 1 */

    nincx = *n * *incx;
    i__1 = nincx;
    i__2 = *incx;
    for (i = 1; *incx < 0 ? i >= nincx : i <= nincx; i += *incx) {
	dtemp += (d__1 = DX(i), abs(d__1));
/* L10: */
    }
    ret_val = dtemp;
    return ret_val;

/*        code for increment equal to 1   


          clean-up loop */

L20:
    m = *n % 6;
    if (m == 0) {
	goto L40;
    }
    i__2 = m;
    for (i = 1; i <= m; ++i) {
	dtemp += (d__1 = DX(i), abs(d__1));
/* L30: */
    }
    if (*n < 6) {
	goto L60;
    }
L40:
    mp1 = m + 1;
    i__2 = *n;
    for (i = mp1; i <= *n; i += 6) {
	dtemp = dtemp + (d__1 = DX(i), abs(d__1)) + (d__2 = DX(i + 1), abs(
		d__2)) + (d__3 = DX(i + 2), abs(d__3)) + (d__4 = DX(i + 3), 
		abs(d__4)) + (d__5 = DX(i + 4), abs(d__5)) + (d__6 = DX(i + 5)
		, abs(d__6));
/* L50: */
    }
L60:
    ret_val = dtemp;
    return ret_val;
} /* dasum_ */

SuperLU_DIST_5.3.0/CBLAS/daxpy.c0000644013363400111340000000326513233431301014757 0ustar  xiaoyessg
/*  -- translated by f2c (version 19940927).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/

#include "f2c.h"

/* Subroutine */ int daxpy_(integer *n, doublereal *da, doublereal *dx, 
	integer *incx, doublereal *dy, integer *incy)
{


    /* System generated locals */
    integer i__1;

    /* Local variables */
    static integer i, m, ix, iy, mp1;


/*     constant times a vector plus a vector.   
       uses unrolled loops for increments equal to one.   
       jack dongarra, linpack, 3/11/78.   
       modified 12/3/93, array(1) declarations changed to array(*)   


    
   Parameter adjustments   
       Function Body */
#define DY(I) dy[(I)-1]
#define DX(I) dx[(I)-1]


    if (*n <= 0) {
	return 0;
    }
    if (*da == 0.) {
	return 0;
    }
    if (*incx == 1 && *incy == 1) {
	goto L20;
    }

/*        code for unequal increments or equal increments   
            not equal to 1 */

    ix = 1;
    iy = 1;
    if (*incx < 0) {
	ix = (-(*n) + 1) * *incx + 1;
    }
    if (*incy < 0) {
	iy = (-(*n) + 1) * *incy + 1;
    }
    i__1 = *n;
    for (i = 1; i <= *n; ++i) {
	DY(iy) += *da * DX(ix);
	ix += *incx;
	iy += *incy;
/* L10: */
    }
    return 0;

/*        code for both increments equal to 1   


          clean-up loop */

L20:
    m = *n % 4;
    if (m == 0) {
	goto L40;
    }
    i__1 = m;
    for (i = 1; i <= m; ++i) {
	DY(i) += *da * DX(i);
/* L30: */
    }
    if (*n < 4) {
	return 0;
    }
L40:
    mp1 = m + 1;
    i__1 = *n;
    for (i = mp1; i <= *n; i += 4) {
	DY(i) += *da * DX(i);
	DY(i + 1) += *da * DX(i + 1);
	DY(i + 2) += *da * DX(i + 2);
	DY(i + 3) += *da * DX(i + 3);
/* L50: */
    }
    return 0;
} /* daxpy_ */

SuperLU_DIST_5.3.0/CBLAS/dcopy.c0000644013363400111340000000323213233431301014742 0ustar  xiaoyessg
/*  -- translated by f2c (version 19940927).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/

#include "f2c.h"

/* Subroutine */ int dcopy_(integer *n, doublereal *dx, integer *incx, 
	doublereal *dy, integer *incy)
{


    /* System generated locals */
    integer i__1;

    /* Local variables */
    static integer i, m, ix, iy, mp1;


/*     copies a vector, x, to a vector, y.   
       uses unrolled loops for increments equal to one.   
       jack dongarra, linpack, 3/11/78.   
       modified 12/3/93, array(1) declarations changed to array(*)   


    
   Parameter adjustments   
       Function Body */
#define DY(I) dy[(I)-1]
#define DX(I) dx[(I)-1]


    if (*n <= 0) {
	return 0;
    }
    if (*incx == 1 && *incy == 1) {
	goto L20;
    }

/*        code for unequal increments or equal increments   
            not equal to 1 */

    ix = 1;
    iy = 1;
    if (*incx < 0) {
	ix = (-(*n) + 1) * *incx + 1;
    }
    if (*incy < 0) {
	iy = (-(*n) + 1) * *incy + 1;
    }
    i__1 = *n;
    for (i = 1; i <= *n; ++i) {
	DY(iy) = DX(ix);
	ix += *incx;
	iy += *incy;
/* L10: */
    }
    return 0;

/*        code for both increments equal to 1   


          clean-up loop */

L20:
    m = *n % 7;
    if (m == 0) {
	goto L40;
    }
    i__1 = m;
    for (i = 1; i <= m; ++i) {
	DY(i) = DX(i);
/* L30: */
    }
    if (*n < 7) {
	return 0;
    }
L40:
    mp1 = m + 1;
    i__1 = *n;
    for (i = mp1; i <= *n; i += 7) {
	DY(i) = DX(i);
	DY(i + 1) = DX(i + 1);
	DY(i + 2) = DX(i + 2);
	DY(i + 3) = DX(i + 3);
	DY(i + 4) = DX(i + 4);
	DY(i + 5) = DX(i + 5);
	DY(i + 6) = DX(i + 6);
/* L50: */
    }
    return 0;
} /* dcopy_ */

SuperLU_DIST_5.3.0/CBLAS/Makefile0000644013363400111340000000601013233431301015115 0ustar  xiaoyessginclude ../make.inc
#HEADER = ../SRC

#######################################################################
#  This is the makefile to create a library for C-BLAS.
#  The files are organized as follows:
#
#       SBLAS1 -- Single precision real BLAS routines
#       CBLAS1 -- Single precision complex BLAS routines
#       DBLAS1 -- Double precision real BLAS routines
#       ZBLAS1 -- Double precision complex BLAS routines
#
#       CB1AUX -- Real BLAS routines called by complex routines
#       ZB1AUX -- D.P. real BLAS routines called by d.p. complex
#                 routines
#
#      ALLBLAS -- Auxiliary routines for Level 2 and 3 BLAS
#
#       SBLAS2 -- Single precision real BLAS2 routines
#       CBLAS2 -- Single precision complex BLAS2 routines
#       DBLAS2 -- Double precision real BLAS2 routines
#       ZBLAS2 -- Double precision complex BLAS2 routines
#
#       SBLAS3 -- Single precision real BLAS3 routines
#       CBLAS3 -- Single precision complex BLAS3 routines
#       DBLAS3 -- Double precision real BLAS3 routines
#       ZBLAS3 -- Double precision complex BLAS3 routines
#
#  The library can be set up to include routines for any combination
#  of the four precisions.  To create or add to the library, enter make
#  followed by one or more of the precisions desired.  Some examples:
#       make single
#       make single complex
#       make single double complex complex16
#  Alternatively, the command
#       make
#  without any arguments creates a library of all four precisions.
#  The library is called
#       blas.a
#  and is created at the next higher directory level.
#
#  To remove the object files after the library is created, enter
#       make clean
#
#######################################################################

SBLAS1 = isamax.o sasum.o saxpy.o scopy.o sdot.o snrm2.o \
	 srot.o sscal.o
SBLAS2 = sgemv.o ssymv.o strsv.o sger.o ssyr2.o

DBLAS1 = idamax.o dasum.o daxpy.o dcopy.o ddot.o dnrm2.o \
	 drot.o dscal.o
DBLAS2 = dgemv.o dsymv.o dtrsv.o dger.o dsyr2.o
DBLAS3 = dgemm.o dtrsm.o

CBLAS1 = icamax.o scasum.o caxpy.o ccopy.o scnrm2.o \
	 cscal.o
CBLAS2 = cgemv.o chemv.o ctrsv.o cgerc.o cgeru.o cher2.o

ZBLAS1 = izamax.o dzasum.o zaxpy.o zcopy.o dznrm2.o \
	 zscal.o dcabs1.o z_internal.o
ZBLAS2 = zgemv.o zhemv.o ztrsv.o zgerc.o zgeru.o zher2.o
ZBLAS3 = zgemm.o ztrsm.o

ALLBLAS = input_error_dist.o

all: single double complex complex16

single: $(SBLAS1) $(SBLAS2) $(SBLAS3) $(ALLBLAS)
	$(ARCH) $(ARCHFLAGS) $(BLASLIB) $(SBLAS1) $(ALLBLAS) \
	$(SBLAS2) $(SBLAS3)
	$(RANLIB) $(BLASLIB)

double: $(DBLAS1) $(DBLAS2) $(DBLAS3) $(ALLBLAS)
	$(ARCH) $(ARCHFLAGS) $(BLASLIB) $(DBLAS1) $(ALLBLAS) \
	$(DBLAS2) $(DBLAS3)
	$(RANLIB) $(BLASLIB)

complex: $(CBLAS1) $(CBLAS2) $(CBLAS3) $(ALLBLAS)
	$(ARCH) $(ARCHFLAGS) $(BLASLIB) $(CBLAS1) $(ALLBLAS) \
	$(CBLAS2) $(CBLAS3)
	$(RANLIB) $(BLASLIB)

complex16: $(ZBLAS1) $(ZBLAS2) $(ZBLAS3) $(ALLBLAS)
	$(ARCH) $(ARCHFLAGS) $(BLASLIB) $(ZBLAS1) $(ALLBLAS) \
	$(ZBLAS2) $(ZBLAS3)
	$(RANLIB) $(BLASLIB)

.c.o:
	$(CC) $(CFLAGS) $(CDEFS) -c $< $(VERBOSE)

clean:	
	rm -f *.o
SuperLU_DIST_5.3.0/CBLAS/dgemm.c0000644013363400111340000002220713233431301014720 0ustar  xiaoyessg
/*  -- translated by f2c (version 19940927).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/
#include 
#include "f2c.h"

/* Subroutine */ int dgemm_(char *transa, char *transb, integer *m, integer *
	n, integer *k, doublereal *alpha, doublereal *a, integer *lda, 
	doublereal *b, integer *ldb, doublereal *beta, doublereal *c, integer 
	*ldc)
{


    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
	    i__3;

    /* Local variables */
    static integer info;
    static logical nota, notb;
    static doublereal temp;
    static integer i, j, l, ncola;
    static integer nrowa, nrowb;
    extern /* Subroutine */ int input_error_dist(char *, integer *);


/*  Purpose   
    =======   

    DGEMM  performs one of the matrix-matrix operations   

       C := alpha*op( A )*op( B ) + beta*C,   

    where  op( X ) is one of   

       op( X ) = X   or   op( X ) = X',   

    alpha and beta are scalars, and A, B and C are matrices, with op( A ) 
  
    an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix. 
  

    Parameters   
    ==========   

    TRANSA - CHARACTER*1.   
             On entry, TRANSA specifies the form of op( A ) to be used in 
  
             the matrix multiplication as follows:   

                TRANSA = 'N' or 'n',  op( A ) = A.   

                TRANSA = 'T' or 't',  op( A ) = A'.   

                TRANSA = 'C' or 'c',  op( A ) = A'.   

             Unchanged on exit.   

    TRANSB - CHARACTER*1.   
             On entry, TRANSB specifies the form of op( B ) to be used in 
  
             the matrix multiplication as follows:   

                TRANSB = 'N' or 'n',  op( B ) = B.   

                TRANSB = 'T' or 't',  op( B ) = B'.   

                TRANSB = 'C' or 'c',  op( B ) = B'.   

             Unchanged on exit.   

    M      - INTEGER.   
             On entry,  M  specifies  the number  of rows  of the  matrix 
  
             op( A )  and of the  matrix  C.  M  must  be at least  zero. 
  
             Unchanged on exit.   

    N      - INTEGER.   
             On entry,  N  specifies the number  of columns of the matrix 
  
             op( B ) and the number of columns of the matrix C. N must be 
  
             at least zero.   
             Unchanged on exit.   

    K      - INTEGER.   
             On entry,  K  specifies  the number of columns of the matrix 
  
             op( A ) and the number of rows of the matrix op( B ). K must 
  
             be at least  zero.   
             Unchanged on exit.   

    ALPHA  - DOUBLE PRECISION.   
             On entry, ALPHA specifies the scalar alpha.   
             Unchanged on exit.   

    A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is 
  
             k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.   
             Before entry with  TRANSA = 'N' or 'n',  the leading  m by k 
  
             part of the array  A  must contain the matrix  A,  otherwise 
  
             the leading  k by m  part of the array  A  must contain  the 
  
             matrix A.   
             Unchanged on exit.   

    LDA    - INTEGER.   
             On entry, LDA specifies the first dimension of A as declared 
  
             in the calling (sub) program. When  TRANSA = 'N' or 'n' then 
  
             LDA must be at least  max( 1, m ), otherwise  LDA must be at 
  
             least  max( 1, k ).   
             Unchanged on exit.   

    B      - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is 
  
             n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.   
             Before entry with  TRANSB = 'N' or 'n',  the leading  k by n 
  
             part of the array  B  must contain the matrix  B,  otherwise 
  
             the leading  n by k  part of the array  B  must contain  the 
  
             matrix B.   
             Unchanged on exit.   

    LDB    - INTEGER.   
             On entry, LDB specifies the first dimension of B as declared 
  
             in the calling (sub) program. When  TRANSB = 'N' or 'n' then 
  
             LDB must be at least  max( 1, k ), otherwise  LDB must be at 
  
             least  max( 1, n ).   
             Unchanged on exit.   

    BETA   - DOUBLE PRECISION.   
             On entry,  BETA  specifies the scalar  beta.  When  BETA  is 
  
             supplied as zero then C need not be set on input.   
             Unchanged on exit.   

    C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).   
             Before entry, the leading  m by n  part of the array  C must 
  
             contain the matrix  C,  except when  beta  is zero, in which 
  
             case C need not be set on entry.   
             On exit, the array  C  is overwritten by the  m by n  matrix 
  
             ( alpha*op( A )*op( B ) + beta*C ).   

    LDC    - INTEGER.   
             On entry, LDC specifies the first dimension of C as declared 
  
             in  the  calling  (sub)  program.   LDC  must  be  at  least 
  
             max( 1, m ).   
             Unchanged on exit.   


    Level 3 Blas routine.   

    -- Written on 8-February-1989.   
       Jack Dongarra, Argonne National Laboratory.   
       Iain Duff, AERE Harwell.   
       Jeremy Du Croz, Numerical Algorithms Group Ltd.   
       Sven Hammarling, Numerical Algorithms Group Ltd.   



       Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not 
  
       transposed and set  NROWA, NCOLA and  NROWB  as the number of rows 
  
       and  columns of  A  and the  number of  rows  of  B  respectively. 
  

    
   Parameter adjustments   
       Function Body */

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]
#define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)]
#define C(I,J) c[(I)-1 + ((J)-1)* ( *ldc)]

    nota = (strncmp(transa, "N", 1)==0);
    notb = (strncmp(transb, "N", 1)==0);
    if (nota) {
	nrowa = *m;
	ncola = *k;
    } else {
	nrowa = *k;
	ncola = *m;
    }
    if (notb) {
	nrowb = *k;
    } else {
	nrowb = *n;
    }

/*     Test the input parameters. */

    info = 0;
    if (! nota && strncmp(transa, "C", 1)!=0 && strncmp(transa, "T", 1)!=0) {
	info = 1;
    } else if (! notb && strncmp(transb, "C", 1)!=0 && strncmp(transb,"T", 1)!=0) {
	info = 2;
    } else if (*m < 0) {
	info = 3;
    } else if (*n < 0) {
	info = 4;
    } else if (*k < 0) {
	info = 5;
    } else if (*lda < max(1,nrowa)) {
	info = 8;
    } else if (*ldb < max(1,nrowb)) {
	info = 10;
    } else if (*ldc < max(1,*m)) {
	info = 13;
    }
    if (info != 0) {
	input_error_dist("DGEMM ", &info);
	return 0;
    }

/*     Quick return if possible. */

    if (*m == 0 || *n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) {
	return 0;
    }

/*     And if  alpha.eq.zero. */

    if (*alpha == 0.) {
	if (*beta == 0.) {
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		i__2 = *m;
		for (i = 1; i <= *m; ++i) {
		    C(i,j) = 0.;
/* L10: */
		}
/* L20: */
	    }
	} else {
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		i__2 = *m;
		for (i = 1; i <= *m; ++i) {
		    C(i,j) = *beta * C(i,j);
/* L30: */
		}
/* L40: */
	    }
	}
	return 0;
    }

/*     Start the operations. */

    if (notb) {
	if (nota) {

/*           Form  C := alpha*A*B + beta*C. */

	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		if (*beta == 0.) {
		    i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			C(i,j) = 0.;
/* L50: */
		    }
		} else if (*beta != 1.) {
		    i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			C(i,j) = *beta * C(i,j);
/* L60: */
		    }
		}
		i__2 = *k;
		for (l = 1; l <= *k; ++l) {
		    if (B(l,j) != 0.) {
			temp = *alpha * B(l,j);
			i__3 = *m;
			for (i = 1; i <= *m; ++i) {
			    C(i,j) += temp * A(i,l);
/* L70: */
			}
		    }
/* L80: */
		}
/* L90: */
	    }
	} else {

/*           Form  C := alpha*A'*B + beta*C */

	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		i__2 = *m;
		for (i = 1; i <= *m; ++i) {
		    temp = 0.;
		    i__3 = *k;
		    for (l = 1; l <= *k; ++l) {
			temp += A(l,i) * B(l,j);
/* L100: */
		    }
		    if (*beta == 0.) {
			C(i,j) = *alpha * temp;
		    } else {
			C(i,j) = *alpha * temp + *beta * C(i,j);
		    }
/* L110: */
		}
/* L120: */
	    }
	}
    } else {
	if (nota) {

/*           Form  C := alpha*A*B' + beta*C */

	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		if (*beta == 0.) {
		    i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			C(i,j) = 0.;
/* L130: */
		    }
		} else if (*beta != 1.) {
		    i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			C(i,j) = *beta * C(i,j);
/* L140: */
		    }
		}
		i__2 = *k;
		for (l = 1; l <= *k; ++l) {
		    if (B(j,l) != 0.) {
			temp = *alpha * B(j,l);
			i__3 = *m;
			for (i = 1; i <= *m; ++i) {
			    C(i,j) += temp * A(i,l);
/* L150: */
			}
		    }
/* L160: */
		}
/* L170: */
	    }
	} else {

/*           Form  C := alpha*A'*B' + beta*C */

	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		i__2 = *m;
		for (i = 1; i <= *m; ++i) {
		    temp = 0.;
		    i__3 = *k;
		    for (l = 1; l <= *k; ++l) {
			temp += A(l,i) * B(j,l);
/* L180: */
		    }
		    if (*beta == 0.) {
			C(i,j) = *alpha * temp;
		    } else {
			C(i,j) = *alpha * temp + *beta * C(i,j);
		    }
/* L190: */
		}
/* L200: */
	    }
	}
    }

    return 0;

/*     End of DGEMM . */

} /* dgemm_ */

SuperLU_DIST_5.3.0/CBLAS/dgemv.c0000644013363400111340000001542413233431301014734 0ustar  xiaoyessg
/*  -- translated by f2c (version 19940927).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/
#include 
#include "f2c.h"

/* Subroutine */ int dgemv_(char *trans, integer *m, integer *n, doublereal *
	alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, 
	doublereal *beta, doublereal *y, integer *incy)
{


    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;

    /* Local variables */
    static integer info;
    static doublereal temp;
    static integer lenx, leny, i, j;
    static integer ix, iy, jx, jy, kx, ky;
    extern /* Subroutine */ int input_error_dist(char *, integer *);


/*  Purpose   
    =======   

    DGEMV  performs one of the matrix-vector operations   

       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   

    where alpha and beta are scalars, x and y are vectors and A is an   
    m by n matrix.   

    Parameters   
    ==========   

    TRANS  - CHARACTER*1.   
             On entry, TRANS specifies the operation to be performed as   
             follows:   

                TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.   

                TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.   

                TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.   

             Unchanged on exit.   

    M      - INTEGER.   
             On entry, M specifies the number of rows of the matrix A.   
             M must be at least zero.   
             Unchanged on exit.   

    N      - INTEGER.   
             On entry, N specifies the number of columns of the matrix A. 
  
             N must be at least zero.   
             Unchanged on exit.   

    ALPHA  - DOUBLE PRECISION.   
             On entry, ALPHA specifies the scalar alpha.   
             Unchanged on exit.   

    A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).   
             Before entry, the leading m by n part of the array A must   
             contain the matrix of coefficients.   
             Unchanged on exit.   

    LDA    - INTEGER.   
             On entry, LDA specifies the first dimension of A as declared 
  
             in the calling (sub) program. LDA must be at least   
             max( 1, m ).   
             Unchanged on exit.   

    X      - DOUBLE PRECISION array of DIMENSION at least   
             ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'   
             and at least   
             ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.   
             Before entry, the incremented array X must contain the   
             vector x.   
             Unchanged on exit.   

    INCX   - INTEGER.   
             On entry, INCX specifies the increment for the elements of   
             X. INCX must not be zero.   
             Unchanged on exit.   

    BETA   - DOUBLE PRECISION.   
             On entry, BETA specifies the scalar beta. When BETA is   
             supplied as zero then Y need not be set on input.   
             Unchanged on exit.   

    Y      - DOUBLE PRECISION array of DIMENSION at least   
             ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'   
             and at least   
             ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.   
             Before entry with BETA non-zero, the incremented array Y   
             must contain the vector y. On exit, Y is overwritten by the 
  
             updated vector y.   

    INCY   - INTEGER.   
             On entry, INCY specifies the increment for the elements of   
             Y. INCY must not be zero.   
             Unchanged on exit.   


    Level 2 Blas routine.   

    -- Written on 22-October-1986.   
       Jack Dongarra, Argonne National Lab.   
       Jeremy Du Croz, Nag Central Office.   
       Sven Hammarling, Nag Central Office.   
       Richard Hanson, Sandia National Labs.   



       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
#define X(I) x[(I)-1]
#define Y(I) y[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]

    info = 0;
    if ( strncmp(trans, "N", 1) !=0 &&  strncmp(trans, "T", 1) !=0 &&
	strncmp(trans, "C", 1)!=0 ) {
	info = 1;
    } else if (*m < 0) {
	info = 2;
    } else if (*n < 0) {
	info = 3;
    } else if (*lda < max(1,*m)) {
	info = 6;
    } else if (*incx == 0) {
	info = 8;
    } else if (*incy == 0) {
	info = 11;
    }
    if (info != 0) {
	input_error_dist("DGEMV ", &info);
	return 0;
    }

/*     Quick return if possible. */

    if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) {
	return 0;
    }

/*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set 
  
       up the start points in  X  and  Y. */

    if (strncmp(trans, "N", 1)==0) {
	lenx = *n;
	leny = *m;
    } else {
	lenx = *m;
	leny = *n;
    }
    if (*incx > 0) {
	kx = 1;
    } else {
	kx = 1 - (lenx - 1) * *incx;
    }
    if (*incy > 0) {
	ky = 1;
    } else {
	ky = 1 - (leny - 1) * *incy;
    }

/*     Start the operations. In this version the elements of A are   
       accessed sequentially with one pass through A.   

       First form  y := beta*y. */

    if (*beta != 1.) {
	if (*incy == 1) {
	    if (*beta == 0.) {
		i__1 = leny;
		for (i = 1; i <= leny; ++i) {
		    Y(i) = 0.;
/* L10: */
		}
	    } else {
		i__1 = leny;
		for (i = 1; i <= leny; ++i) {
		    Y(i) = *beta * Y(i);
/* L20: */
		}
	    }
	} else {
	    iy = ky;
	    if (*beta == 0.) {
		i__1 = leny;
		for (i = 1; i <= leny; ++i) {
		    Y(iy) = 0.;
		    iy += *incy;
/* L30: */
		}
	    } else {
		i__1 = leny;
		for (i = 1; i <= leny; ++i) {
		    Y(iy) = *beta * Y(iy);
		    iy += *incy;
/* L40: */
		}
	    }
	}
    }
    if (*alpha == 0.) {
	return 0;
    }
    if (strncmp(trans, "N", 1)==0) {

/*        Form  y := alpha*A*x + y. */

	jx = kx;
	if (*incy == 1) {
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		if (X(jx) != 0.) {
		    temp = *alpha * X(jx);
		    i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			Y(i) += temp * A(i,j);
/* L50: */
		    }
		}
		jx += *incx;
/* L60: */
	    }
	} else {
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		if (X(jx) != 0.) {
		    temp = *alpha * X(jx);
		    iy = ky;
		    i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			Y(iy) += temp * A(i,j);
			iy += *incy;
/* L70: */
		    }
		}
		jx += *incx;
/* L80: */
	    }
	}
    } else {

/*        Form  y := alpha*A'*x + y. */

	jy = ky;
	if (*incx == 1) {
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		temp = 0.;
		i__2 = *m;
		for (i = 1; i <= *m; ++i) {
		    temp += A(i,j) * X(i);
/* L90: */
		}
		Y(jy) += *alpha * temp;
		jy += *incy;
/* L100: */
	    }
	} else {
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		temp = 0.;
		ix = kx;
		i__2 = *m;
		for (i = 1; i <= *m; ++i) {
		    temp += A(i,j) * X(ix);
		    ix += *incx;
/* L110: */
		}
		Y(jy) += *alpha * temp;
		jy += *incy;
/* L120: */
	    }
	}
    }

    return 0;

/*     End of DGEMV . */

} /* dgemv_ */

SuperLU_DIST_5.3.0/CBLAS/cscal.c0000644013363400111340000000262113233431301014712 0ustar  xiaoyessg
/*  -- translated by f2c (version 19940927).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/

#include "f2c.h"

/* Subroutine */ int cscal_(integer *n, complex *ca, complex *cx, integer *
	incx)
{


    /* System generated locals */
    integer i__1, i__2, i__3, i__4;
    complex q__1;

    /* Local variables */
    static integer i, nincx;


/*     scales a vector by a constant.   
       jack dongarra, linpack,  3/11/78.   
       modified 3/93 to return if incx .le. 0.   
       modified 12/3/93, array(1) declarations changed to array(*)   


    
   Parameter adjustments   
       Function Body */
#define CX(I) cx[(I)-1]


    if (*n <= 0 || *incx <= 0) {
	return 0;
    }
    if (*incx == 1) {
	goto L20;
    }

/*        code for increment not equal to 1 */

    nincx = *n * *incx;
    i__1 = nincx;
    i__2 = *incx;
    for (i = 1; *incx < 0 ? i >= nincx : i <= nincx; i += *incx) {
	i__3 = i;
	i__4 = i;
	q__1.r = ca->r * CX(i).r - ca->i * CX(i).i, q__1.i = ca->r * CX(
		i).i + ca->i * CX(i).r;
	CX(i).r = q__1.r, CX(i).i = q__1.i;
/* L10: */
    }
    return 0;

/*        code for increment equal to 1 */

L20:
    i__2 = *n;
    for (i = 1; i <= *n; ++i) {
	i__1 = i;
	i__3 = i;
	q__1.r = ca->r * CX(i).r - ca->i * CX(i).i, q__1.i = ca->r * CX(
		i).i + ca->i * CX(i).r;
	CX(i).r = q__1.r, CX(i).i = q__1.i;
/* L30: */
    }
    return 0;
} /* cscal_ */

SuperLU_DIST_5.3.0/CBLAS/ctrsv.c0000644013363400111340000003167313233431301014777 0ustar  xiaoyessg
/*  -- translated by f2c (version 19940927).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/
#include 
#include "f2c.h"

/* Subroutine */ int ctrsv_(char *uplo, char *trans, char *diag, integer *n, 
	complex *a, integer *lda, complex *x, integer *incx)
{


    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
    complex q__1, q__2, q__3;

    /* Builtin functions */
    void c_div(complex *, complex *, complex *), r_cnjg(complex *, complex *);

    /* Local variables */
    static integer info;
    static complex temp;
    static integer i, j;
    static integer ix, jx, kx;
    extern /* Subroutine */ int input_error_dist(char *, integer *);
    static logical noconj, nounit;


/*  Purpose   
    =======   

    CTRSV  solves one of the systems of equations   

       A*x = b,   or   A'*x = b,   or   conjg( A' )*x = b,   

    where b and x are n element vectors and A is an n by n unit, or   
    non-unit, upper or lower triangular matrix.   

    No test for singularity or near-singularity is included in this   
    routine. Such tests must be performed before calling this routine.   

    Parameters   
    ==========   

    UPLO   - CHARACTER*1.   
             On entry, UPLO specifies whether the matrix is an upper or   
             lower triangular matrix as follows:   

                UPLO = 'U' or 'u'   A is an upper triangular matrix.   

                UPLO = 'L' or 'l'   A is a lower triangular matrix.   

             Unchanged on exit.   

    TRANS  - CHARACTER*1.   
             On entry, TRANS specifies the equations to be solved as   
             follows:   

                TRANS = 'N' or 'n'   A*x = b.   

                TRANS = 'T' or 't'   A'*x = b.   

                TRANS = 'C' or 'c'   conjg( A' )*x = b.   

             Unchanged on exit.   

    DIAG   - CHARACTER*1.   
             On entry, DIAG specifies whether or not A is unit   
             triangular as follows:   

                DIAG = 'U' or 'u'   A is assumed to be unit triangular.   

                DIAG = 'N' or 'n'   A is not assumed to be unit   
                                    triangular.   

             Unchanged on exit.   

    N      - INTEGER.   
             On entry, N specifies the order of the matrix A.   
             N must be at least zero.   
             Unchanged on exit.   

    A      - COMPLEX          array of DIMENSION ( LDA, n ).   
             Before entry with  UPLO = 'U' or 'u', the leading n by n   
             upper triangular part of the array A must contain the upper 
  
             triangular matrix and the strictly lower triangular part of 
  
             A is not referenced.   
             Before entry with UPLO = 'L' or 'l', the leading n by n   
             lower triangular part of the array A must contain the lower 
  
             triangular matrix and the strictly upper triangular part of 
  
             A is not referenced.   
             Note that when  DIAG = 'U' or 'u', the diagonal elements of 
  
             A are not referenced either, but are assumed to be unity.   
             Unchanged on exit.   

    LDA    - INTEGER.   
             On entry, LDA specifies the first dimension of A as declared 
  
             in the calling (sub) program. LDA must be at least   
             max( 1, n ).   
             Unchanged on exit.   

    X      - COMPLEX          array of dimension at least   
             ( 1 + ( n - 1 )*abs( INCX ) ).   
             Before entry, the incremented array X must contain the n   
             element right-hand side vector b. On exit, X is overwritten 
  
             with the solution vector x.   

    INCX   - INTEGER.   
             On entry, INCX specifies the increment for the elements of   
             X. INCX must not be zero.   
             Unchanged on exit.   


    Level 2 Blas routine.   

    -- Written on 22-October-1986.   
       Jack Dongarra, Argonne National Lab.   
       Jeremy Du Croz, Nag Central Office.   
       Sven Hammarling, Nag Central Office.   
       Richard Hanson, Sandia National Labs.   



       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
#define X(I) x[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]

    info = 0;
    if (strncmp(uplo, "U", 1)!=0 && strncmp(uplo, "L", 1)!=0) {
	info = 1;
    } else if (strncmp(trans, "N", 1)!=0 && strncmp(trans, "T", 1)!=0 &&
	       strncmp(trans, "C", 1)!=0) {
	info = 2;
    } else if (strncmp(diag, "U", 1)!=0 && strncmp(diag, "N", 1)!=0) {
	info = 3;
    } else if (*n < 0) {
	info = 4;
    } else if (*lda < max(1,*n)) {
	info = 6;
    } else if (*incx == 0) {
	info = 8;
    }
    if (info != 0) {
	input_error_dist("CTRSV ", &info);
	return 0;
    }

/*     Quick return if possible. */

    if (*n == 0) {
	return 0;
    }

    noconj = (strncmp(trans, "T", 1)==0);
    nounit = (strncmp(diag, "N", 1)==0);

/*     Set up the start point in X if the increment is not unity. This   
       will be  ( N - 1 )*INCX  too small for descending loops. */

    if (*incx <= 0) {
	kx = 1 - (*n - 1) * *incx;
    } else if (*incx != 1) {
	kx = 1;
    }

/*     Start the operations. In this version the elements of A are   
       accessed sequentially with one pass through A. */

    if (strncmp(trans, "N", 1)==0) {

/*        Form  x := inv( A )*x. */

	if (strncmp(uplo, "U", 1)==0) {
	    if (*incx == 1) {
		for (j = *n; j >= 1; --j) {
		    i__1 = j;
		    if (X(j).r != 0.f || X(j).i != 0.f) {
			if (nounit) {
			    i__1 = j;
			    c_div(&q__1, &X(j), &A(j,j));
			    X(j).r = q__1.r, X(j).i = q__1.i;
			}
			i__1 = j;
			temp.r = X(j).r, temp.i = X(j).i;
			for (i = j - 1; i >= 1; --i) {
			    i__1 = i;
			    i__2 = i;
			    i__3 = i + j * a_dim1;
			    q__2.r = temp.r * A(i,j).r - temp.i * A(i,j).i, 
				    q__2.i = temp.r * A(i,j).i + temp.i * A(i,j).r;
			    q__1.r = X(i).r - q__2.r, q__1.i = X(i).i - 
				    q__2.i;
			    X(i).r = q__1.r, X(i).i = q__1.i;
/* L10: */
			}
		    }
/* L20: */
		}
	    } else {
		jx = kx + (*n - 1) * *incx;
		for (j = *n; j >= 1; --j) {
		    i__1 = jx;
		    if (X(jx).r != 0.f || X(jx).i != 0.f) {
			if (nounit) {
			    i__1 = jx;
			    c_div(&q__1, &X(jx), &A(j,j));
			    X(jx).r = q__1.r, X(jx).i = q__1.i;
			}
			i__1 = jx;
			temp.r = X(jx).r, temp.i = X(jx).i;
			ix = jx;
			for (i = j - 1; i >= 1; --i) {
			    ix -= *incx;
			    i__1 = ix;
			    i__2 = ix;
			    i__3 = i + j * a_dim1;
			    q__2.r = temp.r * A(i,j).r - temp.i * A(i,j).i, 
				    q__2.i = temp.r * A(i,j).i + temp.i * A(i,j).r;
			    q__1.r = X(ix).r - q__2.r, q__1.i = X(ix).i - 
				    q__2.i;
			    X(ix).r = q__1.r, X(ix).i = q__1.i;
/* L30: */
			}
		    }
		    jx -= *incx;
/* L40: */
		}
	    }
	} else {
	    if (*incx == 1) {
		i__1 = *n;
		for (j = 1; j <= *n; ++j) {
		    i__2 = j;
		    if (X(j).r != 0.f || X(j).i != 0.f) {
			if (nounit) {
			    i__2 = j;
			    c_div(&q__1, &X(j), &A(j,j));
			    X(j).r = q__1.r, X(j).i = q__1.i;
			}
			i__2 = j;
			temp.r = X(j).r, temp.i = X(j).i;
			i__2 = *n;
			for (i = j + 1; i <= *n; ++i) {
			    i__3 = i;
			    i__4 = i;
			    i__5 = i + j * a_dim1;
			    q__2.r = temp.r * A(i,j).r - temp.i * A(i,j).i, 
				    q__2.i = temp.r * A(i,j).i + temp.i * A(i,j).r;
			    q__1.r = X(i).r - q__2.r, q__1.i = X(i).i - 
				    q__2.i;
			    X(i).r = q__1.r, X(i).i = q__1.i;
/* L50: */
			}
		    }
/* L60: */
		}
	    } else {
		jx = kx;
		i__1 = *n;
		for (j = 1; j <= *n; ++j) {
		    i__2 = jx;
		    if (X(jx).r != 0.f || X(jx).i != 0.f) {
			if (nounit) {
			    i__2 = jx;
			    c_div(&q__1, &X(jx), &A(j,j));
			    X(jx).r = q__1.r, X(jx).i = q__1.i;
			}
			i__2 = jx;
			temp.r = X(jx).r, temp.i = X(jx).i;
			ix = jx;
			i__2 = *n;
			for (i = j + 1; i <= *n; ++i) {
			    ix += *incx;
			    i__3 = ix;
			    i__4 = ix;
			    i__5 = i + j * a_dim1;
			    q__2.r = temp.r * A(i,j).r - temp.i * A(i,j).i, 
				    q__2.i = temp.r * A(i,j).i + temp.i * A(i,j).r;
			    q__1.r = X(ix).r - q__2.r, q__1.i = X(ix).i - 
				    q__2.i;
			    X(ix).r = q__1.r, X(ix).i = q__1.i;
/* L70: */
			}
		    }
		    jx += *incx;
/* L80: */
		}
	    }
	}
    } else {

/*        Form  x := inv( A' )*x  or  x := inv( conjg( A' ) )*x. */

	if (strncmp(uplo, "U", 1)==0) {
	    if (*incx == 1) {
		i__1 = *n;
		for (j = 1; j <= *n; ++j) {
		    i__2 = j;
		    temp.r = X(j).r, temp.i = X(j).i;
		    if (noconj) {
			i__2 = j - 1;
			for (i = 1; i <= j-1; ++i) {
			    i__3 = i + j * a_dim1;
			    i__4 = i;
			    q__2.r = A(i,j).r * X(i).r - A(i,j).i * X(
				    i).i, q__2.i = A(i,j).r * X(i).i + 
				    A(i,j).i * X(i).r;
			    q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
				    q__2.i;
			    temp.r = q__1.r, temp.i = q__1.i;
/* L90: */
			}
			if (nounit) {
			    c_div(&q__1, &temp, &A(j,j));
			    temp.r = q__1.r, temp.i = q__1.i;
			}
		    } else {
			i__2 = j - 1;
			for (i = 1; i <= j-1; ++i) {
			    r_cnjg(&q__3, &A(i,j));
			    i__3 = i;
			    q__2.r = q__3.r * X(i).r - q__3.i * X(i).i, 
				    q__2.i = q__3.r * X(i).i + q__3.i * X(
				    i).r;
			    q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
				    q__2.i;
			    temp.r = q__1.r, temp.i = q__1.i;
/* L100: */
			}
			if (nounit) {
			    r_cnjg(&q__2, &A(j,j));
			    c_div(&q__1, &temp, &q__2);
			    temp.r = q__1.r, temp.i = q__1.i;
			}
		    }
		    i__2 = j;
		    X(j).r = temp.r, X(j).i = temp.i;
/* L110: */
		}
	    } else {
		jx = kx;
		i__1 = *n;
		for (j = 1; j <= *n; ++j) {
		    ix = kx;
		    i__2 = jx;
		    temp.r = X(jx).r, temp.i = X(jx).i;
		    if (noconj) {
			i__2 = j - 1;
			for (i = 1; i <= j-1; ++i) {
			    i__3 = i + j * a_dim1;
			    i__4 = ix;
			    q__2.r = A(i,j).r * X(ix).r - A(i,j).i * X(
				    ix).i, q__2.i = A(i,j).r * X(ix).i + 
				    A(i,j).i * X(ix).r;
			    q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
				    q__2.i;
			    temp.r = q__1.r, temp.i = q__1.i;
			    ix += *incx;
/* L120: */
			}
			if (nounit) {
			    c_div(&q__1, &temp, &A(j,j));
			    temp.r = q__1.r, temp.i = q__1.i;
			}
		    } else {
			i__2 = j - 1;
			for (i = 1; i <= j-1; ++i) {
			    r_cnjg(&q__3, &A(i,j));
			    i__3 = ix;
			    q__2.r = q__3.r * X(ix).r - q__3.i * X(ix).i, 
				    q__2.i = q__3.r * X(ix).i + q__3.i * X(
				    ix).r;
			    q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
				    q__2.i;
			    temp.r = q__1.r, temp.i = q__1.i;
			    ix += *incx;
/* L130: */
			}
			if (nounit) {
			    r_cnjg(&q__2, &A(j,j));
			    c_div(&q__1, &temp, &q__2);
			    temp.r = q__1.r, temp.i = q__1.i;
			}
		    }
		    i__2 = jx;
		    X(jx).r = temp.r, X(jx).i = temp.i;
		    jx += *incx;
/* L140: */
		}
	    }
	} else {
	    if (*incx == 1) {
		for (j = *n; j >= 1; --j) {
		    i__1 = j;
		    temp.r = X(j).r, temp.i = X(j).i;
		    if (noconj) {
			i__1 = j + 1;
			for (i = *n; i >= j+1; --i) {
			    i__2 = i + j * a_dim1;
			    i__3 = i;
			    q__2.r = A(i,j).r * X(i).r - A(i,j).i * X(
				    i).i, q__2.i = A(i,j).r * X(i).i + 
				    A(i,j).i * X(i).r;
			    q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
				    q__2.i;
			    temp.r = q__1.r, temp.i = q__1.i;
/* L150: */
			}
			if (nounit) {
			    c_div(&q__1, &temp, &A(j,j));
			    temp.r = q__1.r, temp.i = q__1.i;
			}
		    } else {
			i__1 = j + 1;
			for (i = *n; i >= j+1; --i) {
			    r_cnjg(&q__3, &A(i,j));
			    i__2 = i;
			    q__2.r = q__3.r * X(i).r - q__3.i * X(i).i, 
				    q__2.i = q__3.r * X(i).i + q__3.i * X(
				    i).r;
			    q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
				    q__2.i;
			    temp.r = q__1.r, temp.i = q__1.i;
/* L160: */
			}
			if (nounit) {
			    r_cnjg(&q__2, &A(j,j));
			    c_div(&q__1, &temp, &q__2);
			    temp.r = q__1.r, temp.i = q__1.i;
			}
		    }
		    i__1 = j;
		    X(j).r = temp.r, X(j).i = temp.i;
/* L170: */
		}
	    } else {
		kx += (*n - 1) * *incx;
		jx = kx;
		for (j = *n; j >= 1; --j) {
		    ix = kx;
		    i__1 = jx;
		    temp.r = X(jx).r, temp.i = X(jx).i;
		    if (noconj) {
			i__1 = j + 1;
			for (i = *n; i >= j+1; --i) {
			    i__2 = i + j * a_dim1;
			    i__3 = ix;
			    q__2.r = A(i,j).r * X(ix).r - A(i,j).i * X(
				    ix).i, q__2.i = A(i,j).r * X(ix).i + 
				    A(i,j).i * X(ix).r;
			    q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
				    q__2.i;
			    temp.r = q__1.r, temp.i = q__1.i;
			    ix -= *incx;
/* L180: */
			}
			if (nounit) {
			    c_div(&q__1, &temp, &A(j,j));
			    temp.r = q__1.r, temp.i = q__1.i;
			}
		    } else {
			i__1 = j + 1;
			for (i = *n; i >= j+1; --i) {
			    r_cnjg(&q__3, &A(i,j));
			    i__2 = ix;
			    q__2.r = q__3.r * X(ix).r - q__3.i * X(ix).i, 
				    q__2.i = q__3.r * X(ix).i + q__3.i * X(
				    ix).r;
			    q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
				    q__2.i;
			    temp.r = q__1.r, temp.i = q__1.i;
			    ix -= *incx;
/* L190: */
			}
			if (nounit) {
			    r_cnjg(&q__2, &A(j,j));
			    c_div(&q__1, &temp, &q__2);
			    temp.r = q__1.r, temp.i = q__1.i;
			}
		    }
		    i__1 = jx;
		    X(jx).r = temp.r, X(jx).i = temp.i;
		    jx -= *incx;
/* L200: */
		}
	    }
	}
    }

    return 0;

/*     End of CTRSV . */

} /* ctrsv_ */

SuperLU_DIST_5.3.0/CBLAS/f2c.h0000644013363400111340000000173013233431301014304 0ustar  xiaoyessg/* f2c.h  --  Standard Fortran to C header file */

/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

#ifndef F2C_INCLUDE
#define F2C_INCLUDE

typedef int integer;
typedef int logical;

typedef char *address;
typedef short int shortint;
typedef float real;
typedef double doublereal;
typedef struct { real r, i; } complex;
typedef struct { doublereal r, i; } doublecomplex;
typedef short int shortlogical;
typedef char logical1;
typedef char integer1;
/* typedef long long longint; */ /* system-dependent */

#define TRUE_ (1)
#define FALSE_ (0)

/* Extern is for use with -E */
#ifndef Extern
#define Extern extern
#endif

#define abs(x) ((x) >= 0 ? (x) : -(x))
#define dabs(x) (doublereal)abs(x)
#define min(a,b) ((a) <= (b) ? (a) : (b))
#define max(a,b) ((a) >= (b) ? (a) : (b))
#define dmin(a,b) (doublereal)min(a,b)
#define dmax(a,b) (doublereal)max(a,b)

#define VOID void

#endif
SuperLU_DIST_5.3.0/CBLAS/icamax.c0000644013363400111340000000324513233431301015072 0ustar  xiaoyessg#include "f2c.h"

integer icamax_(integer *n, complex *cx, integer *incx)
{
    /* System generated locals */
    integer ret_val, i__1, i__2;
    real r__1, r__2;
    /* Builtin functions */
    double r_imag(complex *);
    /* Local variables */
    static real smax;
    static integer i, ix;
/*     finds the index of element having max. absolute value.   
       jack dongarra, linpack, 3/11/78.   
       modified 3/93 to return if incx .le. 0.   
       modified 12/3/93, array(1) declarations changed to array(*)   
    
   Parameter adjustments   
       Function Body */
#define CX(I) cx[(I)-1]
    ret_val = 0;
    if (*n < 1 || *incx <= 0) {
	return ret_val;
    }
    ret_val = 1;
    if (*n == 1) {
	return ret_val;
    }
    if (*incx == 1) {
	goto L20;
    }
/*        code for increment not equal to 1 */
    ix = 1;
    smax = (r__1 = CX(1).r, dabs(r__1)) + (r__2 = r_imag(&CX(1)), dabs(r__2));
    ix += *incx;
    i__1 = *n;
    for (i = 2; i <= *n; ++i) {
	i__2 = ix;
	if ((r__1 = CX(ix).r, dabs(r__1)) + (r__2 = r_imag(&CX(ix)), dabs(
		r__2)) <= smax) {
	    goto L5;
	}
	ret_val = i;
	i__2 = ix;
	smax = (r__1 = CX(ix).r, dabs(r__1)) + (r__2 = r_imag(&CX(ix)), 
		dabs(r__2));
L5:
	ix += *incx;
/* L10: */
    }
    return ret_val;
/*        code for increment equal to 1 */
L20:
    smax = (r__1 = CX(1).r, dabs(r__1)) + (r__2 = r_imag(&CX(1)), dabs(r__2));
    i__1 = *n;
    for (i = 2; i <= *n; ++i) {
	i__2 = i;
	if ((r__1 = CX(i).r, dabs(r__1)) + (r__2 = r_imag(&CX(i)), dabs(
		r__2)) <= smax) {
	    goto L30;
	}
	ret_val = i;
	i__2 = i;
	smax = (r__1 = CX(i).r, dabs(r__1)) + (r__2 = r_imag(&CX(i)), dabs(
		r__2));
L30:
	;
    }
    return ret_val;
} /* icamax_ */


SuperLU_DIST_5.3.0/CBLAS/dcabs1.c0000644013363400111340000000102013233431301014752 0ustar  xiaoyessg/*  -- translated by f2c (version 19940927).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/

#include "f2c.h"

doublereal dcabs1_(doublecomplex *z)
{
/* >>Start of File<<   

       System generated locals */
    doublereal ret_val;
    static doublecomplex equiv_0[1];

    /* Local variables */
#define t ((doublereal *)equiv_0)
#define zz (equiv_0)

    zz->r = z->r, zz->i = z->i;
    ret_val = abs(t[0]) + abs(t[1]);
    return ret_val;
} /* dcabs1_ */

#undef zz
#undef t


SuperLU_DIST_5.3.0/CBLAS/dnrm2.c0000644013363400111340000000324213233431301014647 0ustar  xiaoyessg
/*  -- translated by f2c (version 19940927).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/

#include "f2c.h"

doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
{


    /* System generated locals */
    integer i__1, i__2;
    doublereal ret_val, d__1;

    /* Builtin functions */
    double sqrt(doublereal);

    /* Local variables */
    static doublereal norm, scale, absxi;
    static integer ix;
    static doublereal ssq;


/*  DNRM2 returns the euclidean norm of a vector via the function   
    name, so that   

       DNRM2 := sqrt( x'*x )   



    -- This version written on 25-October-1982.   
       Modified on 14-October-1993 to inline the call to DLASSQ.   
       Sven Hammarling, Nag Ltd.   


    
   Parameter adjustments   
       Function Body */
#define X(I) x[(I)-1]


    if (*n < 1 || *incx < 1) {
	norm = 0.;
    } else if (*n == 1) {
	norm = abs(X(1));
    } else {
	scale = 0.;
	ssq = 1.;
/*        The following loop is equivalent to this call to the LAPACK 
  
          auxiliary routine:   
          CALL DLASSQ( N, X, INCX, SCALE, SSQ ) */

	i__1 = (*n - 1) * *incx + 1;
	i__2 = *incx;
	for (ix = 1; *incx < 0 ? ix >= (*n-1)**incx+1 : ix <= (*n-1)**incx+1; ix += *incx) {
	    if (X(ix) != 0.) {
		absxi = (d__1 = X(ix), abs(d__1));
		if (scale < absxi) {
/* Computing 2nd power */
		    d__1 = scale / absxi;
		    ssq = ssq * (d__1 * d__1) + 1.;
		    scale = absxi;
		} else {
/* Computing 2nd power */
		    d__1 = absxi / scale;
		    ssq += d__1 * d__1;
		}
	    }
/* L10: */
	}
	norm = scale * sqrt(ssq);
    }

    ret_val = norm;
    return ret_val;

/*     End of DNRM2. */

} /* dnrm2_ */

SuperLU_DIST_5.3.0/CBLAS/sasum.c0000644013363400111340000000344313233431301014760 0ustar  xiaoyessg
/*  -- translated by f2c (version 19940927).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/

#include "f2c.h"

real sasum_(integer *n, real *sx, integer *incx)
{


    /* System generated locals */
    integer i__1, i__2;
    real ret_val, r__1, r__2, r__3, r__4, r__5, r__6;

    /* Local variables */
    static integer i, m, nincx;
    static real stemp;
    static integer mp1;


/*     takes the sum of the absolute values.   
       uses unrolled loops for increment equal to one.   
       jack dongarra, linpack, 3/11/78.   
       modified 3/93 to return if incx .le. 0.   
       modified 12/3/93, array(1) declarations changed to array(*)   


    
   Parameter adjustments   
       Function Body */
#define SX(I) sx[(I)-1]


    ret_val = 0.f;
    stemp = 0.f;
    if (*n <= 0 || *incx <= 0) {
	return ret_val;
    }
    if (*incx == 1) {
	goto L20;
    }

/*        code for increment not equal to 1 */

    nincx = *n * *incx;
    i__1 = nincx;
    i__2 = *incx;
    for (i = 1; *incx < 0 ? i >= nincx : i <= nincx; i += *incx) {
	stemp += (r__1 = SX(i), dabs(r__1));
/* L10: */
    }
    ret_val = stemp;
    return ret_val;

/*        code for increment equal to 1   


          clean-up loop */

L20:
    m = *n % 6;
    if (m == 0) {
	goto L40;
    }
    i__2 = m;
    for (i = 1; i <= m; ++i) {
	stemp += (r__1 = SX(i), dabs(r__1));
/* L30: */
    }
    if (*n < 6) {
	goto L60;
    }
L40:
    mp1 = m + 1;
    i__2 = *n;
    for (i = mp1; i <= *n; i += 6) {
	stemp = stemp + (r__1 = SX(i), dabs(r__1)) + (r__2 = SX(i + 1), dabs(
		r__2)) + (r__3 = SX(i + 2), dabs(r__3)) + (r__4 = SX(i + 3), 
		dabs(r__4)) + (r__5 = SX(i + 4), dabs(r__5)) + (r__6 = SX(i + 
		5), dabs(r__6));
/* L50: */
    }
L60:
    ret_val = stemp;
    return ret_val;
} /* sasum_ */

SuperLU_DIST_5.3.0/CBLAS/saxpy.c0000644013363400111340000000324313233431301014772 0ustar  xiaoyessg
/*  -- translated by f2c (version 19940927).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/

#include "f2c.h"

/* Subroutine */ int saxpy_(integer *n, real *sa, real *sx, integer *incx, 
	real *sy, integer *incy)
{


    /* System generated locals */
    integer i__1;

    /* Local variables */
    static integer i, m, ix, iy, mp1;


/*     constant times a vector plus a vector.   
       uses unrolled loop for increments equal to one.   
       jack dongarra, linpack, 3/11/78.   
       modified 12/3/93, array(1) declarations changed to array(*)   


    
   Parameter adjustments   
       Function Body */
#define SY(I) sy[(I)-1]
#define SX(I) sx[(I)-1]


    if (*n <= 0) {
	return 0;
    }
    if (*sa == 0.f) {
	return 0;
    }
    if (*incx == 1 && *incy == 1) {
	goto L20;
    }

/*        code for unequal increments or equal increments   
            not equal to 1 */

    ix = 1;
    iy = 1;
    if (*incx < 0) {
	ix = (-(*n) + 1) * *incx + 1;
    }
    if (*incy < 0) {
	iy = (-(*n) + 1) * *incy + 1;
    }
    i__1 = *n;
    for (i = 1; i <= *n; ++i) {
	SY(iy) += *sa * SX(ix);
	ix += *incx;
	iy += *incy;
/* L10: */
    }
    return 0;

/*        code for both increments equal to 1   


          clean-up loop */

L20:
    m = *n % 4;
    if (m == 0) {
	goto L40;
    }
    i__1 = m;
    for (i = 1; i <= m; ++i) {
	SY(i) += *sa * SX(i);
/* L30: */
    }
    if (*n < 4) {
	return 0;
    }
L40:
    mp1 = m + 1;
    i__1 = *n;
    for (i = mp1; i <= *n; i += 4) {
	SY(i) += *sa * SX(i);
	SY(i + 1) += *sa * SX(i + 1);
	SY(i + 2) += *sa * SX(i + 2);
	SY(i + 3) += *sa * SX(i + 3);
/* L50: */
    }
    return 0;
} /* saxpy_ */

SuperLU_DIST_5.3.0/CBLAS/scopy.c0000644013363400111340000000321413233431301014761 0ustar  xiaoyessg
/*  -- translated by f2c (version 19940927).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/

#include "f2c.h"

/* Subroutine */ int scopy_(integer *n, real *sx, integer *incx, real *sy, 
	integer *incy)
{


    /* System generated locals */
    integer i__1;

    /* Local variables */
    static integer i, m, ix, iy, mp1;


/*     copies a vector, x, to a vector, y.   
       uses unrolled loops for increments equal to 1.   
       jack dongarra, linpack, 3/11/78.   
       modified 12/3/93, array(1) declarations changed to array(*)   


    
   Parameter adjustments   
       Function Body */
#define SY(I) sy[(I)-1]
#define SX(I) sx[(I)-1]


    if (*n <= 0) {
	return 0;
    }
    if (*incx == 1 && *incy == 1) {
	goto L20;
    }

/*        code for unequal increments or equal increments   
            not equal to 1 */

    ix = 1;
    iy = 1;
    if (*incx < 0) {
	ix = (-(*n) + 1) * *incx + 1;
    }
    if (*incy < 0) {
	iy = (-(*n) + 1) * *incy + 1;
    }
    i__1 = *n;
    for (i = 1; i <= *n; ++i) {
	SY(iy) = SX(ix);
	ix += *incx;
	iy += *incy;
/* L10: */
    }
    return 0;

/*        code for both increments equal to 1   


          clean-up loop */

L20:
    m = *n % 7;
    if (m == 0) {
	goto L40;
    }
    i__1 = m;
    for (i = 1; i <= m; ++i) {
	SY(i) = SX(i);
/* L30: */
    }
    if (*n < 7) {
	return 0;
    }
L40:
    mp1 = m + 1;
    i__1 = *n;
    for (i = mp1; i <= *n; i += 7) {
	SY(i) = SX(i);
	SY(i + 1) = SX(i + 1);
	SY(i + 2) = SX(i + 2);
	SY(i + 3) = SX(i + 3);
	SY(i + 4) = SX(i + 4);
	SY(i + 5) = SX(i + 5);
	SY(i + 6) = SX(i + 6);
/* L50: */
    }
    return 0;
} /* scopy_ */

SuperLU_DIST_5.3.0/CBLAS/zaxpy.c0000644013363400111340000000347113233431301015004 0ustar  xiaoyessg
/*  -- translated by f2c (version 19940927).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/

#include "f2c.h"

/* Subroutine */ int zaxpy_(integer *n, doublecomplex *za, doublecomplex *zx, 
	integer *incx, doublecomplex *zy, integer *incy)
{


    /* System generated locals */
    integer i__1, i__2, i__3, i__4;
    doublecomplex z__1, z__2;

    /* Local variables */
    static integer i;
    extern doublereal dcabs1_(doublecomplex *);
    static integer ix, iy;


/*     constant times a vector plus a vector.   
       jack dongarra, 3/11/78.   
       modified 12/3/93, array(1) declarations changed to array(*)   

    
   Parameter adjustments   
       Function Body */
#define ZY(I) zy[(I)-1]
#define ZX(I) zx[(I)-1]


    if (*n <= 0) {
	return 0;
    }
    if (dcabs1_(za) == 0.) {
	return 0;
    }
    if (*incx == 1 && *incy == 1) {
	goto L20;
    }

/*        code for unequal increments or equal increments   
            not equal to 1 */

    ix = 1;
    iy = 1;
    if (*incx < 0) {
	ix = (-(*n) + 1) * *incx + 1;
    }
    if (*incy < 0) {
	iy = (-(*n) + 1) * *incy + 1;
    }
    i__1 = *n;
    for (i = 1; i <= *n; ++i) {
	i__2 = iy;
	i__3 = iy;
	i__4 = ix;
	z__2.r = za->r * ZX(ix).r - za->i * ZX(ix).i, z__2.i = za->r * ZX(
		ix).i + za->i * ZX(ix).r;
	z__1.r = ZY(iy).r + z__2.r, z__1.i = ZY(iy).i + z__2.i;
	ZY(iy).r = z__1.r, ZY(iy).i = z__1.i;
	ix += *incx;
	iy += *incy;
/* L10: */
    }
    return 0;

/*        code for both increments equal to 1 */

L20:
    i__1 = *n;
    for (i = 1; i <= *n; ++i) {
	i__2 = i;
	i__3 = i;
	i__4 = i;
	z__2.r = za->r * ZX(i).r - za->i * ZX(i).i, z__2.i = za->r * ZX(
		i).i + za->i * ZX(i).r;
	z__1.r = ZY(i).r + z__2.r, z__1.i = ZY(i).i + z__2.i;
	ZY(i).r = z__1.r, ZY(i).i = z__1.i;
/* L30: */
    }
    return 0;
} /* zaxpy_ */

SuperLU_DIST_5.3.0/CBLAS/dscal.c0000644013363400111340000000302213233431301014707 0ustar  xiaoyessg
/*  -- translated by f2c (version 19940927).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/

#include "f2c.h"

/* Subroutine */ int dscal_(integer *n, doublereal *da, doublereal *dx, 
	integer *incx)
{


    /* System generated locals */
    integer i__1, i__2;

    /* Local variables */
    static integer i, m, nincx, mp1;


/*     scales a vector by a constant.   
       uses unrolled loops for increment equal to one.   
       jack dongarra, linpack, 3/11/78.   
       modified 3/93 to return if incx .le. 0.   
       modified 12/3/93, array(1) declarations changed to array(*)   


    
   Parameter adjustments   
       Function Body */
#define DX(I) dx[(I)-1]


    if (*n <= 0 || *incx <= 0) {
	return 0;
    }
    if (*incx == 1) {
	goto L20;
    }

/*        code for increment not equal to 1 */

    nincx = *n * *incx;
    i__1 = nincx;
    i__2 = *incx;
    for (i = 1; *incx < 0 ? i >= nincx : i <= nincx; i += *incx) {
	DX(i) = *da * DX(i);
/* L10: */
    }
    return 0;

/*        code for increment equal to 1   


          clean-up loop */

L20:
    m = *n % 5;
    if (m == 0) {
	goto L40;
    }
    i__2 = m;
    for (i = 1; i <= m; ++i) {
	DX(i) = *da * DX(i);
/* L30: */
    }
    if (*n < 5) {
	return 0;
    }
L40:
    mp1 = m + 1;
    i__2 = *n;
    for (i = mp1; i <= *n; i += 5) {
	DX(i) = *da * DX(i);
	DX(i + 1) = *da * DX(i + 1);
	DX(i + 2) = *da * DX(i + 2);
	DX(i + 3) = *da * DX(i + 3);
	DX(i + 4) = *da * DX(i + 4);
/* L50: */
    }
    return 0;
} /* dscal_ */

SuperLU_DIST_5.3.0/CBLAS/scasum.c0000644013363400111340000000301613233431301015117 0ustar  xiaoyessg
/*  -- translated by f2c (version 19940927).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/

#include "f2c.h"

real scasum_(integer *n, complex *cx, integer *incx)
{


    /* System generated locals */
    integer i__1, i__2, i__3;
    real ret_val, r__1, r__2;

    /* Builtin functions */
    double r_imag(complex *);

    /* Local variables */
    static integer i, nincx;
    static real stemp;


/*     takes the sum of the absolute values of a complex vector and   
       returns a single precision result.   
       jack dongarra, linpack, 3/11/78.   
       modified 3/93 to return if incx .le. 0.   
       modified 12/3/93, array(1) declarations changed to array(*)   


    
   Parameter adjustments   
       Function Body */
#define CX(I) cx[(I)-1]


    ret_val = 0.f;
    stemp = 0.f;
    if (*n <= 0 || *incx <= 0) {
	return ret_val;
    }
    if (*incx == 1) {
	goto L20;
    }

/*        code for increment not equal to 1 */

    nincx = *n * *incx;
    i__1 = nincx;
    i__2 = *incx;
    for (i = 1; *incx < 0 ? i >= nincx : i <= nincx; i += *incx) {
	i__3 = i;
	stemp = stemp + (r__1 = CX(i).r, dabs(r__1)) + (r__2 = r_imag(&CX(
		i)), dabs(r__2));
/* L10: */
    }
    ret_val = stemp;
    return ret_val;

/*        code for increment equal to 1 */

L20:
    i__2 = *n;
    for (i = 1; i <= *n; ++i) {
	i__1 = i;
	stemp = stemp + (r__1 = CX(i).r, dabs(r__1)) + (r__2 = r_imag(&CX(
		i)), dabs(r__2));
/* L30: */
    }
    ret_val = stemp;
    return ret_val;
} /* scasum_ */

SuperLU_DIST_5.3.0/CBLAS/zcopy.c0000644013363400111340000000253313233431301014773 0ustar  xiaoyessg
/*  -- translated by f2c (version 19940927).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/

#include "f2c.h"

/* Subroutine */ int zcopy_(integer *n, doublecomplex *zx, integer *incx, 
	doublecomplex *zy, integer *incy)
{


    /* System generated locals */
    integer i__1, i__2, i__3;

    /* Local variables */
    static integer i, ix, iy;


/*     copies a vector, x, to a vector, y.   
       jack dongarra, linpack, 4/11/78.   
       modified 12/3/93, array(1) declarations changed to array(*)   


    
   Parameter adjustments   
       Function Body */
#define ZY(I) zy[(I)-1]
#define ZX(I) zx[(I)-1]


    if (*n <= 0) {
	return 0;
    }
    if (*incx == 1 && *incy == 1) {
	goto L20;
    }

/*        code for unequal increments or equal increments   
            not equal to 1 */

    ix = 1;
    iy = 1;
    if (*incx < 0) {
	ix = (-(*n) + 1) * *incx + 1;
    }
    if (*incy < 0) {
	iy = (-(*n) + 1) * *incy + 1;
    }
    i__1 = *n;
    for (i = 1; i <= *n; ++i) {
	i__2 = iy;
	i__3 = ix;
	ZY(iy).r = ZX(ix).r, ZY(iy).i = ZX(ix).i;
	ix += *incx;
	iy += *incy;
/* L10: */
    }
    return 0;

/*        code for both increments equal to 1 */

L20:
    i__1 = *n;
    for (i = 1; i <= *n; ++i) {
	i__2 = i;
	i__3 = i;
	ZY(i).r = ZX(i).r, ZY(i).i = ZX(i).i;
/* L30: */
    }
    return 0;
} /* zcopy_ */

SuperLU_DIST_5.3.0/CBLAS/zdotc.c0000644013363400111340000000373413233431301014756 0ustar  xiaoyessg/*  -- translated by f2c (version 19940927).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/

#include "f2c.h"

/* Double Complex */ VOID zdotc_(doublecomplex * ret_val, integer *n, 
	doublecomplex *zx, integer *incx, doublecomplex *zy, integer *incy)
{
    /* System generated locals */
    integer i__1, i__2;
    doublecomplex z__1, z__2, z__3;

    /* Builtin functions */
    void d_cnjg(doublecomplex *, doublecomplex *);

    /* Local variables */
    static integer i;
    static doublecomplex ztemp;
    static integer ix, iy;


/*     forms the dot product of a vector.   
       jack dongarra, 3/11/78.   
       modified 12/3/93, array(1) declarations changed to array(*)   

    
   Parameter adjustments */
    --zy;
    --zx;

    /* Function Body */
    ztemp.r = 0., ztemp.i = 0.;
     ret_val->r = 0.,  ret_val->i = 0.;
    if (*n <= 0) {
	return ;
    }
    if (*incx == 1 && *incy == 1) {
	goto L20;
    }

/*        code for unequal increments or equal increments   
            not equal to 1 */

    ix = 1;
    iy = 1;
    if (*incx < 0) {
	ix = (-(*n) + 1) * *incx + 1;
    }
    if (*incy < 0) {
	iy = (-(*n) + 1) * *incy + 1;
    }
    i__1 = *n;
    for (i = 1; i <= *n; ++i) {
	d_cnjg(&z__3, &zx[ix]);
	i__2 = iy;
	z__2.r = z__3.r * zy[iy].r - z__3.i * zy[iy].i, z__2.i = z__3.r * 
		zy[iy].i + z__3.i * zy[iy].r;
	z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i;
	ztemp.r = z__1.r, ztemp.i = z__1.i;
	ix += *incx;
	iy += *incy;
/* L10: */
    }
     ret_val->r = ztemp.r,  ret_val->i = ztemp.i;
    return ;

/*        code for both increments equal to 1 */

L20:
    i__1 = *n;
    for (i = 1; i <= *n; ++i) {
	d_cnjg(&z__3, &zx[i]);
	i__2 = i;
	z__2.r = z__3.r * zy[i].r - z__3.i * zy[i].i, z__2.i = z__3.r * 
		zy[i].i + z__3.i * zy[i].r;
	z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i;
	ztemp.r = z__1.r, ztemp.i = z__1.i;
/* L30: */
    }
     ret_val->r = ztemp.r,  ret_val->i = ztemp.i;
    return ;
} /* zdotc_ */

SuperLU_DIST_5.3.0/CBLAS/dsyr2.c0000644013363400111340000001515313233431301014674 0ustar  xiaoyessg
/*  -- translated by f2c (version 19940927).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/
#include 
#include "f2c.h"

/* Subroutine */ int dsyr2_(char *uplo, integer *n, doublereal *alpha, 
	doublereal *x, integer *incx, doublereal *y, integer *incy, 
	doublereal *a, integer *lda)
{


    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;

    /* Local variables */
    static integer info;
    static doublereal temp1, temp2;
    static integer i, j;
    static integer ix, iy, jx, jy, kx, ky;
    extern /* Subroutine */ int input_error_dist(char *, integer *);


/*  Purpose   
    =======   

    DSYR2  performs the symmetric rank 2 operation   

       A := alpha*x*y' + alpha*y*x' + A,   

    where alpha is a scalar, x and y are n element vectors and A is an n 
  
    by n symmetric matrix.   

    Parameters   
    ==========   

    UPLO   - CHARACTER*1.   
             On entry, UPLO specifies whether the upper or lower   
             triangular part of the array A is to be referenced as   
             follows:   

                UPLO = 'U' or 'u'   Only the upper triangular part of A   
                                    is to be referenced.   

                UPLO = 'L' or 'l'   Only the lower triangular part of A   
                                    is to be referenced.   

             Unchanged on exit.   

    N      - INTEGER.   
             On entry, N specifies the order of the matrix A.   
             N must be at least zero.   
             Unchanged on exit.   

    ALPHA  - DOUBLE PRECISION.   
             On entry, ALPHA specifies the scalar alpha.   
             Unchanged on exit.   

    X      - DOUBLE PRECISION array of dimension at least   
             ( 1 + ( n - 1 )*abs( INCX ) ).   
             Before entry, the incremented array X must contain the n   
             element vector x.   
             Unchanged on exit.   

    INCX   - INTEGER.   
             On entry, INCX specifies the increment for the elements of   
             X. INCX must not be zero.   
             Unchanged on exit.   

    Y      - DOUBLE PRECISION array of dimension at least   
             ( 1 + ( n - 1 )*abs( INCY ) ).   
             Before entry, the incremented array Y must contain the n   
             element vector y.   
             Unchanged on exit.   

    INCY   - INTEGER.   
             On entry, INCY specifies the increment for the elements of   
             Y. INCY must not be zero.   
             Unchanged on exit.   

    A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).   
             Before entry with  UPLO = 'U' or 'u', the leading n by n   
             upper triangular part of the array A must contain the upper 
  
             triangular part of the symmetric matrix and the strictly   
             lower triangular part of A is not referenced. On exit, the   
             upper triangular part of the array A is overwritten by the   
             upper triangular part of the updated matrix.   
             Before entry with UPLO = 'L' or 'l', the leading n by n   
             lower triangular part of the array A must contain the lower 
  
             triangular part of the symmetric matrix and the strictly   
             upper triangular part of A is not referenced. On exit, the   
             lower triangular part of the array A is overwritten by the   
             lower triangular part of the updated matrix.   

    LDA    - INTEGER.   
             On entry, LDA specifies the first dimension of A as declared 
  
             in the calling (sub) program. LDA must be at least   
             max( 1, n ).   
             Unchanged on exit.   


    Level 2 Blas routine.   

    -- Written on 22-October-1986.   
       Jack Dongarra, Argonne National Lab.   
       Jeremy Du Croz, Nag Central Office.   
       Sven Hammarling, Nag Central Office.   
       Richard Hanson, Sandia National Labs.   



       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
#define X(I) x[(I)-1]
#define Y(I) y[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]

    info = 0;
    if (strncmp(uplo, "U", 1)!=0 && strncmp(uplo, "L", 1)!=0) {
	info = 1;
    } else if (*n < 0) {
	info = 2;
    } else if (*incx == 0) {
	info = 5;
    } else if (*incy == 0) {
	info = 7;
    } else if (*lda < max(1,*n)) {
	info = 9;
    }
    if (info != 0) {
	input_error_dist("DSYR2 ", &info);
	return 0;
    }

/*     Quick return if possible. */

    if (*n == 0 || *alpha == 0.) {
	return 0;
    }

/*     Set up the start points in X and Y if the increments are not both 
  
       unity. */

    if (*incx != 1 || *incy != 1) {
	if (*incx > 0) {
	    kx = 1;
	} else {
	    kx = 1 - (*n - 1) * *incx;
	}
	if (*incy > 0) {
	    ky = 1;
	} else {
	    ky = 1 - (*n - 1) * *incy;
	}
	jx = kx;
	jy = ky;
    }

/*     Start the operations. In this version the elements of A are   
       accessed sequentially with one pass through the triangular part   
       of A. */

    if (strncmp(uplo, "U", 1)==0) {

/*        Form  A  when A is stored in the upper triangle. */

	if (*incx == 1 && *incy == 1) {
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		if (X(j) != 0. || Y(j) != 0.) {
		    temp1 = *alpha * Y(j);
		    temp2 = *alpha * X(j);
		    i__2 = j;
		    for (i = 1; i <= j; ++i) {
			A(i,j) = A(i,j) + X(i) * temp1 
				+ Y(i) * temp2;
/* L10: */
		    }
		}
/* L20: */
	    }
	} else {
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		if (X(jx) != 0. || Y(jy) != 0.) {
		    temp1 = *alpha * Y(jy);
		    temp2 = *alpha * X(jx);
		    ix = kx;
		    iy = ky;
		    i__2 = j;
		    for (i = 1; i <= j; ++i) {
			A(i,j) = A(i,j) + X(ix) * temp1 
				+ Y(iy) * temp2;
			ix += *incx;
			iy += *incy;
/* L30: */
		    }
		}
		jx += *incx;
		jy += *incy;
/* L40: */
	    }
	}
    } else {

/*        Form  A  when A is stored in the lower triangle. */

	if (*incx == 1 && *incy == 1) {
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		if (X(j) != 0. || Y(j) != 0.) {
		    temp1 = *alpha * Y(j);
		    temp2 = *alpha * X(j);
		    i__2 = *n;
		    for (i = j; i <= *n; ++i) {
			A(i,j) = A(i,j) + X(i) * temp1 
				+ Y(i) * temp2;
/* L50: */
		    }
		}
/* L60: */
	    }
	} else {
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		if (X(jx) != 0. || Y(jy) != 0.) {
		    temp1 = *alpha * Y(jy);
		    temp2 = *alpha * X(jx);
		    ix = jx;
		    iy = jy;
		    i__2 = *n;
		    for (i = j; i <= *n; ++i) {
			A(i,j) = A(i,j) + X(ix) * temp1 
				+ Y(iy) * temp2;
			ix += *incx;
			iy += *incy;
/* L70: */
		    }
		}
		jx += *incx;
		jy += *incy;
/* L80: */
	    }
	}
    }

    return 0;

/*     End of DSYR2 . */

} /* dsyr2_ */

SuperLU_DIST_5.3.0/CBLAS/dsymv.c0000644013363400111340000001615713233431301015000 0ustar  xiaoyessg
/*  -- translated by f2c (version 19940927).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/
#include 
#include "f2c.h"

/* Subroutine */ int dsymv_(char *uplo, integer *n, doublereal *alpha, 
	doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal 
	*beta, doublereal *y, integer *incy)
{


    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;

    /* Local variables */
    static integer info;
    static doublereal temp1, temp2;
    static integer i, j;
    static integer ix, iy, jx, jy, kx, ky;
    extern /* Subroutine */ int input_error_dist(char *, integer *);


/*  Purpose   
    =======   

    DSYMV  performs the matrix-vector  operation   

       y := alpha*A*x + beta*y,   

    where alpha and beta are scalars, x and y are n element vectors and   
    A is an n by n symmetric matrix.   

    Parameters   
    ==========   

    UPLO   - CHARACTER*1.   
             On entry, UPLO specifies whether the upper or lower   
             triangular part of the array A is to be referenced as   
             follows:   

                UPLO = 'U' or 'u'   Only the upper triangular part of A   
                                    is to be referenced.   

                UPLO = 'L' or 'l'   Only the lower triangular part of A   
                                    is to be referenced.   

             Unchanged on exit.   

    N      - INTEGER.   
             On entry, N specifies the order of the matrix A.   
             N must be at least zero.   
             Unchanged on exit.   

    ALPHA  - DOUBLE PRECISION.   
             On entry, ALPHA specifies the scalar alpha.   
             Unchanged on exit.   

    A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).   
             Before entry with  UPLO = 'U' or 'u', the leading n by n   
             upper triangular part of the array A must contain the upper 
  
             triangular part of the symmetric matrix and the strictly   
             lower triangular part of A is not referenced.   
             Before entry with UPLO = 'L' or 'l', the leading n by n   
             lower triangular part of the array A must contain the lower 
  
             triangular part of the symmetric matrix and the strictly   
             upper triangular part of A is not referenced.   
             Unchanged on exit.   

    LDA    - INTEGER.   
             On entry, LDA specifies the first dimension of A as declared 
  
             in the calling (sub) program. LDA must be at least   
             max( 1, n ).   
             Unchanged on exit.   

    X      - DOUBLE PRECISION array of dimension at least   
             ( 1 + ( n - 1 )*abs( INCX ) ).   
             Before entry, the incremented array X must contain the n   
             element vector x.   
             Unchanged on exit.   

    INCX   - INTEGER.   
             On entry, INCX specifies the increment for the elements of   
             X. INCX must not be zero.   
             Unchanged on exit.   

    BETA   - DOUBLE PRECISION.   
             On entry, BETA specifies the scalar beta. When BETA is   
             supplied as zero then Y need not be set on input.   
             Unchanged on exit.   

    Y      - DOUBLE PRECISION array of dimension at least   
             ( 1 + ( n - 1 )*abs( INCY ) ).   
             Before entry, the incremented array Y must contain the n   
             element vector y. On exit, Y is overwritten by the updated   
             vector y.   

    INCY   - INTEGER.   
             On entry, INCY specifies the increment for the elements of   
             Y. INCY must not be zero.   
             Unchanged on exit.   


    Level 2 Blas routine.   

    -- Written on 22-October-1986.   
       Jack Dongarra, Argonne National Lab.   
       Jeremy Du Croz, Nag Central Office.   
       Sven Hammarling, Nag Central Office.   
       Richard Hanson, Sandia National Labs.   



       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
#define X(I) x[(I)-1]
#define Y(I) y[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]

    info = 0;
    if (strncmp(uplo, "U", 1)!=0 && strncmp(uplo, "L", 1)!=0) {
	info = 1;
    } else if (*n < 0) {
	info = 2;
    } else if (*lda < max(1,*n)) {
	info = 5;
    } else if (*incx == 0) {
	info = 7;
    } else if (*incy == 0) {
	info = 10;
    }
    if (info != 0) {
	input_error_dist("DSYMV ", &info);
	return 0;
    }

/*     Quick return if possible. */

    if (*n == 0 || *alpha == 0. && *beta == 1.) {
	return 0;
    }

/*     Set up the start points in  X  and  Y. */

    if (*incx > 0) {
	kx = 1;
    } else {
	kx = 1 - (*n - 1) * *incx;
    }
    if (*incy > 0) {
	ky = 1;
    } else {
	ky = 1 - (*n - 1) * *incy;
    }

/*     Start the operations. In this version the elements of A are   
       accessed sequentially with one pass through the triangular part   
       of A.   

       First form  y := beta*y. */

    if (*beta != 1.) {
	if (*incy == 1) {
	    if (*beta == 0.) {
		i__1 = *n;
		for (i = 1; i <= *n; ++i) {
		    Y(i) = 0.;
/* L10: */
		}
	    } else {
		i__1 = *n;
		for (i = 1; i <= *n; ++i) {
		    Y(i) = *beta * Y(i);
/* L20: */
		}
	    }
	} else {
	    iy = ky;
	    if (*beta == 0.) {
		i__1 = *n;
		for (i = 1; i <= *n; ++i) {
		    Y(iy) = 0.;
		    iy += *incy;
/* L30: */
		}
	    } else {
		i__1 = *n;
		for (i = 1; i <= *n; ++i) {
		    Y(iy) = *beta * Y(iy);
		    iy += *incy;
/* L40: */
		}
	    }
	}
    }
    if (*alpha == 0.) {
	return 0;
    }
    if (strncmp(uplo, "U", 1)==0) {

/*        Form  y  when A is stored in upper triangle. */

	if (*incx == 1 && *incy == 1) {
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		temp1 = *alpha * X(j);
		temp2 = 0.;
		i__2 = j - 1;
		for (i = 1; i <= j-1; ++i) {
		    Y(i) += temp1 * A(i,j);
		    temp2 += A(i,j) * X(i);
/* L50: */
		}
		Y(j) = Y(j) + temp1 * A(j,j) + *alpha * temp2;
/* L60: */
	    }
	} else {
	    jx = kx;
	    jy = ky;
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		temp1 = *alpha * X(jx);
		temp2 = 0.;
		ix = kx;
		iy = ky;
		i__2 = j - 1;
		for (i = 1; i <= j-1; ++i) {
		    Y(iy) += temp1 * A(i,j);
		    temp2 += A(i,j) * X(ix);
		    ix += *incx;
		    iy += *incy;
/* L70: */
		}
		Y(jy) = Y(jy) + temp1 * A(j,j) + *alpha * temp2;
		jx += *incx;
		jy += *incy;
/* L80: */
	    }
	}
    } else {

/*        Form  y  when A is stored in lower triangle. */

	if (*incx == 1 && *incy == 1) {
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		temp1 = *alpha * X(j);
		temp2 = 0.;
		Y(j) += temp1 * A(j,j);
		i__2 = *n;
		for (i = j + 1; i <= *n; ++i) {
		    Y(i) += temp1 * A(i,j);
		    temp2 += A(i,j) * X(i);
/* L90: */
		}
		Y(j) += *alpha * temp2;
/* L100: */
	    }
	} else {
	    jx = kx;
	    jy = ky;
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		temp1 = *alpha * X(jx);
		temp2 = 0.;
		Y(jy) += temp1 * A(j,j);
		ix = jx;
		iy = jy;
		i__2 = *n;
		for (i = j + 1; i <= *n; ++i) {
		    ix += *incx;
		    iy += *incy;
		    Y(iy) += temp1 * A(i,j);
		    temp2 += A(i,j) * X(ix);
/* L110: */
		}
		Y(jy) += *alpha * temp2;
		jx += *incx;
		jy += *incy;
/* L120: */
	    }
	}
    }

    return 0;

/*     End of DSYMV . */

} /* dsymv_ */

SuperLU_DIST_5.3.0/CBLAS/sgemv.c0000644013363400111340000001537113233431301014754 0ustar  xiaoyessg
/*  -- translated by f2c (version 19940927).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/
#include 
#include "f2c.h"

/* Subroutine */ int sgemv_(char *trans, integer *m, integer *n, real *alpha, 
	real *a, integer *lda, real *x, integer *incx, real *beta, real *y, 
	integer *incy)
{


    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;

    /* Local variables */
    static integer info;
    static real temp;
    static integer lenx, leny, i, j;
    static integer ix, iy, jx, jy, kx, ky;
    extern /* Subroutine */ int input_error_dist(char *, integer *);


/*  Purpose   
    =======   

    SGEMV  performs one of the matrix-vector operations   

       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   

    where alpha and beta are scalars, x and y are vectors and A is an   
    m by n matrix.   

    Parameters   
    ==========   

    TRANS  - CHARACTER*1.   
             On entry, TRANS specifies the operation to be performed as   
             follows:   

                TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.   

                TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.   

                TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.   

             Unchanged on exit.   

    M      - INTEGER.   
             On entry, M specifies the number of rows of the matrix A.   
             M must be at least zero.   
             Unchanged on exit.   

    N      - INTEGER.   
             On entry, N specifies the number of columns of the matrix A. 
  
             N must be at least zero.   
             Unchanged on exit.   

    ALPHA  - REAL            .   
             On entry, ALPHA specifies the scalar alpha.   
             Unchanged on exit.   

    A      - REAL             array of DIMENSION ( LDA, n ).   
             Before entry, the leading m by n part of the array A must   
             contain the matrix of coefficients.   
             Unchanged on exit.   

    LDA    - INTEGER.   
             On entry, LDA specifies the first dimension of A as declared 
  
             in the calling (sub) program. LDA must be at least   
             max( 1, m ).   
             Unchanged on exit.   

    X      - REAL             array of DIMENSION at least   
             ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'   
             and at least   
             ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.   
             Before entry, the incremented array X must contain the   
             vector x.   
             Unchanged on exit.   

    INCX   - INTEGER.   
             On entry, INCX specifies the increment for the elements of   
             X. INCX must not be zero.   
             Unchanged on exit.   

    BETA   - REAL            .   
             On entry, BETA specifies the scalar beta. When BETA is   
             supplied as zero then Y need not be set on input.   
             Unchanged on exit.   

    Y      - REAL             array of DIMENSION at least   
             ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'   
             and at least   
             ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.   
             Before entry with BETA non-zero, the incremented array Y   
             must contain the vector y. On exit, Y is overwritten by the 
  
             updated vector y.   

    INCY   - INTEGER.   
             On entry, INCY specifies the increment for the elements of   
             Y. INCY must not be zero.   
             Unchanged on exit.   


    Level 2 Blas routine.   

    -- Written on 22-October-1986.   
       Jack Dongarra, Argonne National Lab.   
       Jeremy Du Croz, Nag Central Office.   
       Sven Hammarling, Nag Central Office.   
       Richard Hanson, Sandia National Labs.   



       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
#define X(I) x[(I)-1]
#define Y(I) y[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]

    info = 0;
    if (strncmp(trans, "N", 1)!=0 && strncmp(trans, "T", 1)!=0 &&  
	strncmp(trans, "C", 1)!=0) {
	info = 1;
    } else if (*m < 0) {
	info = 2;
    } else if (*n < 0) {
	info = 3;
    } else if (*lda < max(1,*m)) {
	info = 6;
    } else if (*incx == 0) {
	info = 8;
    } else if (*incy == 0) {
	info = 11;
    }
    if (info != 0) {
	input_error_dist("SGEMV ", &info);
	return 0;
    }

/*     Quick return if possible. */

    if (*m == 0 || *n == 0 || *alpha == 0.f && *beta == 1.f) {
	return 0;
    }

/*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set 
  
       up the start points in  X  and  Y. */

    if (strncmp(trans, "N", 1)==0) {
	lenx = *n;
	leny = *m;
    } else {
	lenx = *m;
	leny = *n;
    }
    if (*incx > 0) {
	kx = 1;
    } else {
	kx = 1 - (lenx - 1) * *incx;
    }
    if (*incy > 0) {
	ky = 1;
    } else {
	ky = 1 - (leny - 1) * *incy;
    }

/*     Start the operations. In this version the elements of A are   
       accessed sequentially with one pass through A.   

       First form  y := beta*y. */

    if (*beta != 1.f) {
	if (*incy == 1) {
	    if (*beta == 0.f) {
		i__1 = leny;
		for (i = 1; i <= leny; ++i) {
		    Y(i) = 0.f;
/* L10: */
		}
	    } else {
		i__1 = leny;
		for (i = 1; i <= leny; ++i) {
		    Y(i) = *beta * Y(i);
/* L20: */
		}
	    }
	} else {
	    iy = ky;
	    if (*beta == 0.f) {
		i__1 = leny;
		for (i = 1; i <= leny; ++i) {
		    Y(iy) = 0.f;
		    iy += *incy;
/* L30: */
		}
	    } else {
		i__1 = leny;
		for (i = 1; i <= leny; ++i) {
		    Y(iy) = *beta * Y(iy);
		    iy += *incy;
/* L40: */
		}
	    }
	}
    }
    if (*alpha == 0.f) {
	return 0;
    }
    if (strncmp(trans, "N", 1)==0) {

/*        Form  y := alpha*A*x + y. */

	jx = kx;
	if (*incy == 1) {
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		if (X(jx) != 0.f) {
		    temp = *alpha * X(jx);
		    i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			Y(i) += temp * A(i,j);
/* L50: */
		    }
		}
		jx += *incx;
/* L60: */
	    }
	} else {
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		if (X(jx) != 0.f) {
		    temp = *alpha * X(jx);
		    iy = ky;
		    i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			Y(iy) += temp * A(i,j);
			iy += *incy;
/* L70: */
		    }
		}
		jx += *incx;
/* L80: */
	    }
	}
    } else {

/*        Form  y := alpha*A'*x + y. */

	jy = ky;
	if (*incx == 1) {
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		temp = 0.f;
		i__2 = *m;
		for (i = 1; i <= *m; ++i) {
		    temp += A(i,j) * X(i);
/* L90: */
		}
		Y(jy) += *alpha * temp;
		jy += *incy;
/* L100: */
	    }
	} else {
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		temp = 0.f;
		ix = kx;
		i__2 = *m;
		for (i = 1; i <= *m; ++i) {
		    temp += A(i,j) * X(ix);
		    ix += *incx;
/* L110: */
		}
		Y(jy) += *alpha * temp;
		jy += *incy;
/* L120: */
	    }
	}
    }

    return 0;

/*     End of SGEMV . */

} /* sgemv_ */

SuperLU_DIST_5.3.0/CBLAS/dtrsm.c0000644013363400111340000002464213233431301014765 0ustar  xiaoyessg
/*  -- translated by f2c (version 19940927).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/
#include 
#include "f2c.h"

/* Subroutine */ int dtrsm_(char *side, char *uplo, char *transa, char *diag, 
	integer *m, integer *n, doublereal *alpha, doublereal *a, integer *
	lda, doublereal *b, integer *ldb)
{


    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;

    /* Local variables */
    static integer info;
    static doublereal temp;
    static integer i, j, k;
    static logical lside;
    static integer nrowa;
    static logical upper;
    extern /* Subroutine */ int input_error_dist(char *, integer *);
    static logical nounit;


/*  Purpose   
    =======   

    DTRSM  solves one of the matrix equations   

       op( A )*X = alpha*B,   or   X*op( A ) = alpha*B,   

    where alpha is a scalar, X and B are m by n matrices, A is a unit, or 
  
    non-unit,  upper or lower triangular matrix  and  op( A )  is one  of 
  

       op( A ) = A   or   op( A ) = A'.   

    The matrix X is overwritten on B.   

    Parameters   
    ==========   

    SIDE   - CHARACTER*1.   
             On entry, SIDE specifies whether op( A ) appears on the left 
  
             or right of X as follows:   

                SIDE = 'L' or 'l'   op( A )*X = alpha*B.   

                SIDE = 'R' or 'r'   X*op( A ) = alpha*B.   

             Unchanged on exit.   

    UPLO   - CHARACTER*1.   
             On entry, UPLO specifies whether the matrix A is an upper or 
  
             lower triangular matrix as follows:   

                UPLO = 'U' or 'u'   A is an upper triangular matrix.   

                UPLO = 'L' or 'l'   A is a lower triangular matrix.   

             Unchanged on exit.   

    TRANSA - CHARACTER*1.   
             On entry, TRANSA specifies the form of op( A ) to be used in 
  
             the matrix multiplication as follows:   

                TRANSA = 'N' or 'n'   op( A ) = A.   

                TRANSA = 'T' or 't'   op( A ) = A'.   

                TRANSA = 'C' or 'c'   op( A ) = A'.   

             Unchanged on exit.   

    DIAG   - CHARACTER*1.   
             On entry, DIAG specifies whether or not A is unit triangular 
  
             as follows:   

                DIAG = 'U' or 'u'   A is assumed to be unit triangular.   

                DIAG = 'N' or 'n'   A is not assumed to be unit   
                                    triangular.   

             Unchanged on exit.   

    M      - INTEGER.   
             On entry, M specifies the number of rows of B. M must be at 
  
             least zero.   
             Unchanged on exit.   

    N      - INTEGER.   
             On entry, N specifies the number of columns of B.  N must be 
  
             at least zero.   
             Unchanged on exit.   

    ALPHA  - DOUBLE PRECISION.   
             On entry,  ALPHA specifies the scalar  alpha. When  alpha is 
  
             zero then  A is not referenced and  B need not be set before 
  
             entry.   
             Unchanged on exit.   

    A      - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m 
  
             when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'. 
  
             Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k 
  
             upper triangular part of the array  A must contain the upper 
  
             triangular matrix  and the strictly lower triangular part of 
  
             A is not referenced.   
             Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k 
  
             lower triangular part of the array  A must contain the lower 
  
             triangular matrix  and the strictly upper triangular part of 
  
             A is not referenced.   
             Note that when  DIAG = 'U' or 'u',  the diagonal elements of 
  
             A  are not referenced either,  but are assumed to be  unity. 
  
             Unchanged on exit.   

    LDA    - INTEGER.   
             On entry, LDA specifies the first dimension of A as declared 
  
             in the calling (sub) program.  When  SIDE = 'L' or 'l'  then 
  
             LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r' 
  
             then LDA must be at least max( 1, n ).   
             Unchanged on exit.   

    B      - DOUBLE PRECISION array of DIMENSION ( LDB, n ).   
             Before entry,  the leading  m by n part of the array  B must 
  
             contain  the  right-hand  side  matrix  B,  and  on exit  is 
  
             overwritten by the solution matrix  X.   

    LDB    - INTEGER.   
             On entry, LDB specifies the first dimension of B as declared 
  
             in  the  calling  (sub)  program.   LDB  must  be  at  least 
  
             max( 1, m ).   
             Unchanged on exit.   


    Level 3 Blas routine.   


    -- Written on 8-February-1989.   
       Jack Dongarra, Argonne National Laboratory.   
       Iain Duff, AERE Harwell.   
       Jeremy Du Croz, Numerical Algorithms Group Ltd.   
       Sven Hammarling, Numerical Algorithms Group Ltd.   

       Test the input parameters.   
    
   Parameter adjustments   
       Function Body */

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]
#define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)]

    lside = (strncmp(side, "L", 1)==0);
    if (lside) {
	nrowa = *m;
    } else {
	nrowa = *n;
    }
    nounit = (strncmp(diag, "N", 1)==0);
    upper = (strncmp(uplo, "U", 1)==0);

    info = 0;
    if (! lside && strncmp(side, "R", 1)!=0) {
	info = 1;
    } else if (! upper && strncmp(uplo, "L", 1)!=0) {
	info = 2;
    } else if (strncmp(transa, "N", 1)!=0 && strncmp(transa, "T", 1)!=0
	       &&  strncmp(transa, "C", 1)!=0) {
	info = 3;
    } else if (strncmp(diag, "U", 1)!=0 && strncmp(diag, "N", 1)!=0) {
	info = 4;
    } else if (*m < 0) {
	info = 5;
    } else if (*n < 0) {
	info = 6;
    } else if (*lda < max(1,nrowa)) {
	info = 9;
    } else if (*ldb < max(1,*m)) {
	info = 11;
    }
    if (info != 0) {
	input_error_dist("DTRSM ", &info);
	return 0;
    }

/*     Quick return if possible. */

    if (*n == 0) {
	return 0;
    }

/*     And when  alpha.eq.zero. */

    if (*alpha == 0.) {
	i__1 = *n;
	for (j = 1; j <= *n; ++j) {
	    i__2 = *m;
	    for (i = 1; i <= *m; ++i) {
		B(i,j) = 0.;
/* L10: */
	    }
/* L20: */
	}
	return 0;
    }

/*     Start the operations. */

    if (lside) {
	if (strncmp(transa, "N", 1)==0) {

/*           Form  B := alpha*inv( A )*B. */

	    if (upper) {
		i__1 = *n;
		for (j = 1; j <= *n; ++j) {
		    if (*alpha != 1.) {
			i__2 = *m;
			for (i = 1; i <= *m; ++i) {
			    B(i,j) = *alpha * B(i,j);
/* L30: */
			}
		    }
		    for (k = *m; k >= 1; --k) {
			if (B(k,j) != 0.) {
			    if (nounit) {
				B(k,j) /= A(k,k);
			    }
			    i__2 = k - 1;
			    for (i = 1; i <= k-1; ++i) {
				B(i,j) -= B(k,j) * A(i,k);
/* L40: */
			    }
			}
/* L50: */
		    }
/* L60: */
		}
	    } else {
		i__1 = *n;
		for (j = 1; j <= *n; ++j) {
		    if (*alpha != 1.) {
			i__2 = *m;
			for (i = 1; i <= *m; ++i) {
			    B(i,j) = *alpha * B(i,j);
/* L70: */
			}
		    }
		    i__2 = *m;
		    for (k = 1; k <= *m; ++k) {
			if (B(k,j) != 0.) {
			    if (nounit) {
				B(k,j) /= A(k,k);
			    }
			    i__3 = *m;
			    for (i = k + 1; i <= *m; ++i) {
				B(i,j) -= B(k,j) * A(i,k);
/* L80: */
			    }
			}
/* L90: */
		    }
/* L100: */
		}
	    }
	} else {

/*           Form  B := alpha*inv( A' )*B. */

	    if (upper) {
		i__1 = *n;
		for (j = 1; j <= *n; ++j) {
		    i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			temp = *alpha * B(i,j);
			i__3 = i - 1;
			for (k = 1; k <= i-1; ++k) {
			    temp -= A(k,i) * B(k,j);
/* L110: */
			}
			if (nounit) {
			    temp /= A(i,i);
			}
			B(i,j) = temp;
/* L120: */
		    }
/* L130: */
		}
	    } else {
		i__1 = *n;
		for (j = 1; j <= *n; ++j) {
		    for (i = *m; i >= 1; --i) {
			temp = *alpha * B(i,j);
			i__2 = *m;
			for (k = i + 1; k <= *m; ++k) {
			    temp -= A(k,i) * B(k,j);
/* L140: */
			}
			if (nounit) {
			    temp /= A(i,i);
			}
			B(i,j) = temp;
/* L150: */
		    }
/* L160: */
		}
	    }
	}
    } else {
	if (strncmp(transa, "N", 1)==0) {

/*           Form  B := alpha*B*inv( A ). */

	    if (upper) {
		i__1 = *n;
		for (j = 1; j <= *n; ++j) {
		    if (*alpha != 1.) {
			i__2 = *m;
			for (i = 1; i <= *m; ++i) {
			    B(i,j) = *alpha * B(i,j);
/* L170: */
			}
		    }
		    i__2 = j - 1;
		    for (k = 1; k <= j-1; ++k) {
			if (A(k,j) != 0.) {
			    i__3 = *m;
			    for (i = 1; i <= *m; ++i) {
				B(i,j) -= A(k,j) * B(i,k);
/* L180: */
			    }
			}
/* L190: */
		    }
		    if (nounit) {
			temp = 1. / A(j,j);
			i__2 = *m;
			for (i = 1; i <= *m; ++i) {
			    B(i,j) = temp * B(i,j);
/* L200: */
			}
		    }
/* L210: */
		}
	    } else {
		for (j = *n; j >= 1; --j) {
		    if (*alpha != 1.) {
			i__1 = *m;
			for (i = 1; i <= *m; ++i) {
			    B(i,j) = *alpha * B(i,j);
/* L220: */
			}
		    }
		    i__1 = *n;
		    for (k = j + 1; k <= *n; ++k) {
			if (A(k,j) != 0.) {
			    i__2 = *m;
			    for (i = 1; i <= *m; ++i) {
				B(i,j) -= A(k,j) * B(i,k);
/* L230: */
			    }
			}
/* L240: */
		    }
		    if (nounit) {
			temp = 1. / A(j,j);
			i__1 = *m;
			for (i = 1; i <= *m; ++i) {
			    B(i,j) = temp * B(i,j);
/* L250: */
			}
		    }
/* L260: */
		}
	    }
	} else {

/*           Form  B := alpha*B*inv( A' ). */

	    if (upper) {
		for (k = *n; k >= 1; --k) {
		    if (nounit) {
			temp = 1. / A(k,k);
			i__1 = *m;
			for (i = 1; i <= *m; ++i) {
			    B(i,k) = temp * B(i,k);
/* L270: */
			}
		    }
		    i__1 = k - 1;
		    for (j = 1; j <= k-1; ++j) {
			if (A(j,k) != 0.) {
			    temp = A(j,k);
			    i__2 = *m;
			    for (i = 1; i <= *m; ++i) {
				B(i,j) -= temp * B(i,k);
/* L280: */
			    }
			}
/* L290: */
		    }
		    if (*alpha != 1.) {
			i__1 = *m;
			for (i = 1; i <= *m; ++i) {
			    B(i,k) = *alpha * B(i,k);
/* L300: */
			}
		    }
/* L310: */
		}
	    } else {
		i__1 = *n;
		for (k = 1; k <= *n; ++k) {
		    if (nounit) {
			temp = 1. / A(k,k);
			i__2 = *m;
			for (i = 1; i <= *m; ++i) {
			    B(i,k) = temp * B(i,k);
/* L320: */
			}
		    }
		    i__2 = *n;
		    for (j = k + 1; j <= *n; ++j) {
			if (A(j,k) != 0.) {
			    temp = A(j,k);
			    i__3 = *m;
			    for (i = 1; i <= *m; ++i) {
				B(i,j) -= temp * B(i,k);
/* L330: */
			    }
			}
/* L340: */
		    }
		    if (*alpha != 1.) {
			i__2 = *m;
			for (i = 1; i <= *m; ++i) {
			    B(i,k) = *alpha * B(i,k);
/* L350: */
			}
		    }
/* L360: */
		}
	    }
	}
    }

    return 0;

/*     End of DTRSM . */

} /* dtrsm_ */

SuperLU_DIST_5.3.0/CBLAS/dtrsv.c0000644013363400111340000001715013233431301014772 0ustar  xiaoyessg
/*  -- translated by f2c (version 19940927).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/
#include 
#include "f2c.h"

/* Subroutine */ int dtrsv_(char *uplo, char *trans, char *diag, integer *n, 
	doublereal *a, integer *lda, doublereal *x, integer *incx)
{


    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;

    /* Local variables */
    static integer info;
    static doublereal temp;
    static integer i, j;
    static integer ix, jx, kx;
    extern /* Subroutine */ int input_error_dist(char *, integer *);
    static logical nounit;


/*  Purpose   
    =======   

    DTRSV  solves one of the systems of equations   

       A*x = b,   or   A'*x = b,   

    where b and x are n element vectors and A is an n by n unit, or   
    non-unit, upper or lower triangular matrix.   

    No test for singularity or near-singularity is included in this   
    routine. Such tests must be performed before calling this routine.   

    Parameters   
    ==========   

    UPLO   - CHARACTER*1.   
             On entry, UPLO specifies whether the matrix is an upper or   
             lower triangular matrix as follows:   

                UPLO = 'U' or 'u'   A is an upper triangular matrix.   

                UPLO = 'L' or 'l'   A is a lower triangular matrix.   

             Unchanged on exit.   

    TRANS  - CHARACTER*1.   
             On entry, TRANS specifies the equations to be solved as   
             follows:   

                TRANS = 'N' or 'n'   A*x = b.   

                TRANS = 'T' or 't'   A'*x = b.   

                TRANS = 'C' or 'c'   A'*x = b.   

             Unchanged on exit.   

    DIAG   - CHARACTER*1.   
             On entry, DIAG specifies whether or not A is unit   
             triangular as follows:   

                DIAG = 'U' or 'u'   A is assumed to be unit triangular.   

                DIAG = 'N' or 'n'   A is not assumed to be unit   
                                    triangular.   

             Unchanged on exit.   

    N      - INTEGER.   
             On entry, N specifies the order of the matrix A.   
             N must be at least zero.   
             Unchanged on exit.   

    A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).   
             Before entry with  UPLO = 'U' or 'u', the leading n by n   
             upper triangular part of the array A must contain the upper 
  
             triangular matrix and the strictly lower triangular part of 
  
             A is not referenced.   
             Before entry with UPLO = 'L' or 'l', the leading n by n   
             lower triangular part of the array A must contain the lower 
  
             triangular matrix and the strictly upper triangular part of 
  
             A is not referenced.   
             Note that when  DIAG = 'U' or 'u', the diagonal elements of 
  
             A are not referenced either, but are assumed to be unity.   
             Unchanged on exit.   

    LDA    - INTEGER.   
             On entry, LDA specifies the first dimension of A as declared 
  
             in the calling (sub) program. LDA must be at least   
             max( 1, n ).   
             Unchanged on exit.   

    X      - DOUBLE PRECISION array of dimension at least   
             ( 1 + ( n - 1 )*abs( INCX ) ).   
             Before entry, the incremented array X must contain the n   
             element right-hand side vector b. On exit, X is overwritten 
  
             with the solution vector x.   

    INCX   - INTEGER.   
             On entry, INCX specifies the increment for the elements of   
             X. INCX must not be zero.   
             Unchanged on exit.   


    Level 2 Blas routine.   

    -- Written on 22-October-1986.   
       Jack Dongarra, Argonne National Lab.   
       Jeremy Du Croz, Nag Central Office.   
       Sven Hammarling, Nag Central Office.   
       Richard Hanson, Sandia National Labs.   



       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
#define X(I) x[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]

    info = 0;
    if (strncmp(uplo, "U", 1)!=0 && strncmp(uplo, "L", 1)!=0) {
	info = 1;
    } else if (strncmp(trans, "N", 1)!=0 && strncmp(trans, "T", 1)!=0 &&
	       strncmp(trans, "C", 1)!=0) {
	info = 2;
    } else if (strncmp(diag, "U", 1)!=0 && strncmp(diag, "N", 1)!=0) {
	info = 3;
    } else if (*n < 0) {
	info = 4;
    } else if (*lda < max(1,*n)) {
	info = 6;
    } else if (*incx == 0) {
	info = 8;
    }
    if (info != 0) {
	input_error_dist("DTRSV ", &info);
	return 0;
    }

/*     Quick return if possible. */

    if (*n == 0) {
	return 0;
    }

    nounit = (strncmp(diag, "N", 1)==0);

/*     Set up the start point in X if the increment is not unity. This   
       will be  ( N - 1 )*INCX  too small for descending loops. */

    if (*incx <= 0) {
	kx = 1 - (*n - 1) * *incx;
    } else if (*incx != 1) {
	kx = 1;
    }

/*     Start the operations. In this version the elements of A are   
       accessed sequentially with one pass through A. */

    if (strncmp(trans, "N", 1)==0) {

/*        Form  x := inv( A )*x. */

	if (strncmp(uplo, "U", 1)==0) {
	    if (*incx == 1) {
		for (j = *n; j >= 1; --j) {
		    if (X(j) != 0.) {
			if (nounit) {
			    X(j) /= A(j,j);
			}
			temp = X(j);
			for (i = j - 1; i >= 1; --i) {
			    X(i) -= temp * A(i,j);
/* L10: */
			}
		    }
/* L20: */
		}
	    } else {
		jx = kx + (*n - 1) * *incx;
		for (j = *n; j >= 1; --j) {
		    if (X(jx) != 0.) {
			if (nounit) {
			    X(jx) /= A(j,j);
			}
			temp = X(jx);
			ix = jx;
			for (i = j - 1; i >= 1; --i) {
			    ix -= *incx;
			    X(ix) -= temp * A(i,j);
/* L30: */
			}
		    }
		    jx -= *incx;
/* L40: */
		}
	    }
	} else {
	    if (*incx == 1) {
		i__1 = *n;
		for (j = 1; j <= *n; ++j) {
		    if (X(j) != 0.) {
			if (nounit) {
			    X(j) /= A(j,j);
			}
			temp = X(j);
			i__2 = *n;
			for (i = j + 1; i <= *n; ++i) {
			    X(i) -= temp * A(i,j);
/* L50: */
			}
		    }
/* L60: */
		}
	    } else {
		jx = kx;
		i__1 = *n;
		for (j = 1; j <= *n; ++j) {
		    if (X(jx) != 0.) {
			if (nounit) {
			    X(jx) /= A(j,j);
			}
			temp = X(jx);
			ix = jx;
			i__2 = *n;
			for (i = j + 1; i <= *n; ++i) {
			    ix += *incx;
			    X(ix) -= temp * A(i,j);
/* L70: */
			}
		    }
		    jx += *incx;
/* L80: */
		}
	    }
	}
    } else {

/*        Form  x := inv( A' )*x. */

	if (strncmp(uplo, "U", 1)==0) {
	    if (*incx == 1) {
		i__1 = *n;
		for (j = 1; j <= *n; ++j) {
		    temp = X(j);
		    i__2 = j - 1;
		    for (i = 1; i <= j-1; ++i) {
			temp -= A(i,j) * X(i);
/* L90: */
		    }
		    if (nounit) {
			temp /= A(j,j);
		    }
		    X(j) = temp;
/* L100: */
		}
	    } else {
		jx = kx;
		i__1 = *n;
		for (j = 1; j <= *n; ++j) {
		    temp = X(jx);
		    ix = kx;
		    i__2 = j - 1;
		    for (i = 1; i <= j-1; ++i) {
			temp -= A(i,j) * X(ix);
			ix += *incx;
/* L110: */
		    }
		    if (nounit) {
			temp /= A(j,j);
		    }
		    X(jx) = temp;
		    jx += *incx;
/* L120: */
		}
	    }
	} else {
	    if (*incx == 1) {
		for (j = *n; j >= 1; --j) {
		    temp = X(j);
		    i__1 = j + 1;
		    for (i = *n; i >= j+1; --i) {
			temp -= A(i,j) * X(i);
/* L130: */
		    }
		    if (nounit) {
			temp /= A(j,j);
		    }
		    X(j) = temp;
/* L140: */
		}
	    } else {
		kx += (*n - 1) * *incx;
		jx = kx;
		for (j = *n; j >= 1; --j) {
		    temp = X(jx);
		    ix = kx;
		    i__1 = j + 1;
		    for (i = *n; i >= j+1; --i) {
			temp -= A(i,j) * X(ix);
			ix -= *incx;
/* L150: */
		    }
		    if (nounit) {
			temp /= A(j,j);
		    }
		    X(jx) = temp;
		    jx -= *incx;
/* L160: */
		}
	    }
	}
    }

    return 0;

/*     End of DTRSV . */

} /* dtrsv_ */

SuperLU_DIST_5.3.0/CBLAS/zgemm.c0000644013363400111340000004523313233431301014752 0ustar  xiaoyessg
/*  -- translated by f2c (version 19940927).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/
#include 
#include "f2c.h"

/* Subroutine */ int zgemm_(char *transa, char *transb, integer *m, integer *
	n, integer *k, doublecomplex *alpha, doublecomplex *a, integer *lda, 
	doublecomplex *b, integer *ldb, doublecomplex *beta, doublecomplex *c,
	 integer *ldc)
{


    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
	    i__3, i__4, i__5, i__6;
    doublecomplex z__1, z__2, z__3, z__4;

    /* Builtin functions */
    void d_cnjg(doublecomplex *, doublecomplex *);

    /* Local variables */
    static integer info;
    static logical nota, notb;
    static doublecomplex temp;
    static integer i, j, l;
    static logical conja, conjb;
    static integer ncola;
    static integer nrowa, nrowb;
    extern /* Subroutine */ int input_error_dist(char *, integer *);


/*  Purpose   
    =======   

    ZGEMM  performs one of the matrix-matrix operations   

       C := alpha*op( A )*op( B ) + beta*C,   

    where  op( X ) is one of   

       op( X ) = X   or   op( X ) = X'   or   op( X ) = conjg( X' ),   

    alpha and beta are scalars, and A, B and C are matrices, with op( A ) 
  
    an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix. 
  

    Parameters   
    ==========   

    TRANSA - CHARACTER*1.   
             On entry, TRANSA specifies the form of op( A ) to be used in 
  
             the matrix multiplication as follows:   

                TRANSA = 'N' or 'n',  op( A ) = A.   

                TRANSA = 'T' or 't',  op( A ) = A'.   

                TRANSA = 'C' or 'c',  op( A ) = conjg( A' ).   

             Unchanged on exit.   

    TRANSB - CHARACTER*1.   
             On entry, TRANSB specifies the form of op( B ) to be used in 
  
             the matrix multiplication as follows:   

                TRANSB = 'N' or 'n',  op( B ) = B.   

                TRANSB = 'T' or 't',  op( B ) = B'.   

                TRANSB = 'C' or 'c',  op( B ) = conjg( B' ).   

             Unchanged on exit.   

    M      - INTEGER.   
             On entry,  M  specifies  the number  of rows  of the  matrix 
  
             op( A )  and of the  matrix  C.  M  must  be at least  zero. 
  
             Unchanged on exit.   

    N      - INTEGER.   
             On entry,  N  specifies the number  of columns of the matrix 
  
             op( B ) and the number of columns of the matrix C. N must be 
  
             at least zero.   
             Unchanged on exit.   

    K      - INTEGER.   
             On entry,  K  specifies  the number of columns of the matrix 
  
             op( A ) and the number of rows of the matrix op( B ). K must 
  
             be at least  zero.   
             Unchanged on exit.   

    ALPHA  - COMPLEX*16      .   
             On entry, ALPHA specifies the scalar alpha.   
             Unchanged on exit.   

    A      - COMPLEX*16       array of DIMENSION ( LDA, ka ), where ka is 
  
             k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.   
             Before entry with  TRANSA = 'N' or 'n',  the leading  m by k 
  
             part of the array  A  must contain the matrix  A,  otherwise 
  
             the leading  k by m  part of the array  A  must contain  the 
  
             matrix A.   
             Unchanged on exit.   

    LDA    - INTEGER.   
             On entry, LDA specifies the first dimension of A as declared 
  
             in the calling (sub) program. When  TRANSA = 'N' or 'n' then 
  
             LDA must be at least  max( 1, m ), otherwise  LDA must be at 
  
             least  max( 1, k ).   
             Unchanged on exit.   

    B      - COMPLEX*16       array of DIMENSION ( LDB, kb ), where kb is 
  
             n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.   
             Before entry with  TRANSB = 'N' or 'n',  the leading  k by n 
  
             part of the array  B  must contain the matrix  B,  otherwise 
  
             the leading  n by k  part of the array  B  must contain  the 
  
             matrix B.   
             Unchanged on exit.   

    LDB    - INTEGER.   
             On entry, LDB specifies the first dimension of B as declared 
  
             in the calling (sub) program. When  TRANSB = 'N' or 'n' then 
  
             LDB must be at least  max( 1, k ), otherwise  LDB must be at 
  
             least  max( 1, n ).   
             Unchanged on exit.   

    BETA   - COMPLEX*16      .   
             On entry,  BETA  specifies the scalar  beta.  When  BETA  is 
  
             supplied as zero then C need not be set on input.   
             Unchanged on exit.   

    C      - COMPLEX*16       array of DIMENSION ( LDC, n ).   
             Before entry, the leading  m by n  part of the array  C must 
  
             contain the matrix  C,  except when  beta  is zero, in which 
  
             case C need not be set on entry.   
             On exit, the array  C  is overwritten by the  m by n  matrix 
  
             ( alpha*op( A )*op( B ) + beta*C ).   

    LDC    - INTEGER.   
             On entry, LDC specifies the first dimension of C as declared 
  
             in  the  calling  (sub)  program.   LDC  must  be  at  least 
  
             max( 1, m ).   
             Unchanged on exit.   


    Level 3 Blas routine.   

    -- Written on 8-February-1989.   
       Jack Dongarra, Argonne National Laboratory.   
       Iain Duff, AERE Harwell.   
       Jeremy Du Croz, Numerical Algorithms Group Ltd.   
       Sven Hammarling, Numerical Algorithms Group Ltd.   

       Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not 
       conjugated or transposed, set  CONJA and CONJB  as true if  A  and 
       B  respectively are to be  transposed but  not conjugated  and set 
       NROWA, NCOLA and  NROWB  as the number of rows and  columns  of  A 
       and the number of rows of  B  respectively.   
    
   Parameter adjustments   
       Function Body */

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]
#define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)]
#define C(I,J) c[(I)-1 + ((J)-1)* ( *ldc)]

    nota = (strncmp(transa, "N", 1)==0);
    notb = (strncmp(transb, "N", 1)==0);
    conja = (strncmp(transa, "C", 1)==0);
    conjb = (strncmp(transb, "C", 1)==0);
    if (nota) {
	nrowa = *m;
	ncola = *k;
    } else {
	nrowa = *k;
	ncola = *m;
    }
    if (notb) {
	nrowb = *k;
    } else {
	nrowb = *n;
    }

/*     Test the input parameters. */

    info = 0;
    if (! nota && ! conja && strncmp(transa, "T", 1)!=0) {
	info = 1;
    } else if (! notb && ! conjb && strncmp(transb, "T", 1)!=0) {
	info = 2;
    } else if (*m < 0) {
	info = 3;
    } else if (*n < 0) {
	info = 4;
    } else if (*k < 0) {
	info = 5;
    } else if (*lda < max(1,nrowa)) {
	info = 8;
    } else if (*ldb < max(1,nrowb)) {
	info = 10;
    } else if (*ldc < max(1,*m)) {
	info = 13;
    }
    if (info != 0) {
	input_error_dist("ZGEMM ", &info);
	return 0;
    }

/*     Quick return if possible. */

    if (*m == 0 || *n == 0 || (alpha->r == 0. && alpha->i == 0. || *k == 0) &&
	     (beta->r == 1. && beta->i == 0.)) {
	return 0;
    }

/*     And when  alpha.eq.zero. */

    if (alpha->r == 0. && alpha->i == 0.) {
	if (beta->r == 0. && beta->i == 0.) {
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		i__2 = *m;
		for (i = 1; i <= *m; ++i) {
		    i__3 = i + j * c_dim1;
		    C(i,j).r = 0., C(i,j).i = 0.;
/* L10: */
		}
/* L20: */
	    }
	} else {
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		i__2 = *m;
		for (i = 1; i <= *m; ++i) {
		    i__3 = i + j * c_dim1;
		    i__4 = i + j * c_dim1;
		    z__1.r = beta->r * C(i,j).r - beta->i * C(i,j).i, 
			    z__1.i = beta->r * C(i,j).i + beta->i * C(i,j)
			    .r;
		    C(i,j).r = z__1.r, C(i,j).i = z__1.i;
/* L30: */
		}
/* L40: */
	    }
	}
	return 0;
    }

/*     Start the operations. */

    if (notb) {
	if (nota) {

/*           Form  C := alpha*A*B + beta*C. */

	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		if (beta->r == 0. && beta->i == 0.) {
		    i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			i__3 = i + j * c_dim1;
			C(i,j).r = 0., C(i,j).i = 0.;
/* L50: */
		    }
		} else if (beta->r != 1. || beta->i != 0.) {
		    i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			i__3 = i + j * c_dim1;
			i__4 = i + j * c_dim1;
			z__1.r = beta->r * C(i,j).r - beta->i * C(i,j).i, 
				z__1.i = beta->r * C(i,j).i + beta->i * C(i,j).r;
			C(i,j).r = z__1.r, C(i,j).i = z__1.i;
/* L60: */
		    }
		}
		i__2 = *k;
		for (l = 1; l <= *k; ++l) {
		    i__3 = l + j * b_dim1;
		    if (B(l,j).r != 0. || B(l,j).i != 0.) {
			i__3 = l + j * b_dim1;
			z__1.r = alpha->r * B(l,j).r - alpha->i * B(l,j).i, 
				z__1.i = alpha->r * B(l,j).i + alpha->i * B(l,j).r;
			temp.r = z__1.r, temp.i = z__1.i;
			i__3 = *m;
			for (i = 1; i <= *m; ++i) {
			    i__4 = i + j * c_dim1;
			    i__5 = i + j * c_dim1;
			    i__6 = i + l * a_dim1;
			    z__2.r = temp.r * A(i,l).r - temp.i * A(i,l).i, 
				    z__2.i = temp.r * A(i,l).i + temp.i * A(i,l).r;
			    z__1.r = C(i,j).r + z__2.r, z__1.i = C(i,j).i + 
				    z__2.i;
			    C(i,j).r = z__1.r, C(i,j).i = z__1.i;
/* L70: */
			}
		    }
/* L80: */
		}
/* L90: */
	    }
	} else if (conja) {

/*           Form  C := alpha*conjg( A' )*B + beta*C. */

	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		i__2 = *m;
		for (i = 1; i <= *m; ++i) {
		    temp.r = 0., temp.i = 0.;
		    i__3 = *k;
		    for (l = 1; l <= *k; ++l) {
			d_cnjg(&z__3, &A(l,i));
			i__4 = l + j * b_dim1;
			z__2.r = z__3.r * B(l,j).r - z__3.i * B(l,j).i, 
				z__2.i = z__3.r * B(l,j).i + z__3.i * B(l,j)
				.r;
			z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
			temp.r = z__1.r, temp.i = z__1.i;
/* L100: */
		    }
		    if (beta->r == 0. && beta->i == 0.) {
			i__3 = i + j * c_dim1;
			z__1.r = alpha->r * temp.r - alpha->i * temp.i, 
				z__1.i = alpha->r * temp.i + alpha->i * 
				temp.r;
			C(i,j).r = z__1.r, C(i,j).i = z__1.i;
		    } else {
			i__3 = i + j * c_dim1;
			z__2.r = alpha->r * temp.r - alpha->i * temp.i, 
				z__2.i = alpha->r * temp.i + alpha->i * 
				temp.r;
			i__4 = i + j * c_dim1;
			z__3.r = beta->r * C(i,j).r - beta->i * C(i,j).i, 
				z__3.i = beta->r * C(i,j).i + beta->i * C(i,j).r;
			z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
			C(i,j).r = z__1.r, C(i,j).i = z__1.i;
		    }
/* L110: */
		}
/* L120: */
	    }
	} else {

/*           Form  C := alpha*A'*B + beta*C */

	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		i__2 = *m;
		for (i = 1; i <= *m; ++i) {
		    temp.r = 0., temp.i = 0.;
		    i__3 = *k;
		    for (l = 1; l <= *k; ++l) {
			i__4 = l + i * a_dim1;
			i__5 = l + j * b_dim1;
			z__2.r = A(l,i).r * B(l,j).r - A(l,i).i * B(l,j)
				.i, z__2.i = A(l,i).r * B(l,j).i + A(l,i)
				.i * B(l,j).r;
			z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
			temp.r = z__1.r, temp.i = z__1.i;
/* L130: */
		    }
		    if (beta->r == 0. && beta->i == 0.) {
			i__3 = i + j * c_dim1;
			z__1.r = alpha->r * temp.r - alpha->i * temp.i, 
				z__1.i = alpha->r * temp.i + alpha->i * 
				temp.r;
			C(i,j).r = z__1.r, C(i,j).i = z__1.i;
		    } else {
			i__3 = i + j * c_dim1;
			z__2.r = alpha->r * temp.r - alpha->i * temp.i, 
				z__2.i = alpha->r * temp.i + alpha->i * 
				temp.r;
			i__4 = i + j * c_dim1;
			z__3.r = beta->r * C(i,j).r - beta->i * C(i,j).i, 
				z__3.i = beta->r * C(i,j).i + beta->i * C(i,j).r;
			z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
			C(i,j).r = z__1.r, C(i,j).i = z__1.i;
		    }
/* L140: */
		}
/* L150: */
	    }
	}
    } else if (nota) {
	if (conjb) {

/*           Form  C := alpha*A*conjg( B' ) + beta*C. */

	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		if (beta->r == 0. && beta->i == 0.) {
		    i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			i__3 = i + j * c_dim1;
			C(i,j).r = 0., C(i,j).i = 0.;
/* L160: */
		    }
		} else if (beta->r != 1. || beta->i != 0.) {
		    i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			i__3 = i + j * c_dim1;
			i__4 = i + j * c_dim1;
			z__1.r = beta->r * C(i,j).r - beta->i * C(i,j).i, 
				z__1.i = beta->r * C(i,j).i + beta->i * C(i,j).r;
			C(i,j).r = z__1.r, C(i,j).i = z__1.i;
/* L170: */
		    }
		}
		i__2 = *k;
		for (l = 1; l <= *k; ++l) {
		    i__3 = j + l * b_dim1;
		    if (B(j,l).r != 0. || B(j,l).i != 0.) {
			d_cnjg(&z__2, &B(j,l));
			z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, 
				z__1.i = alpha->r * z__2.i + alpha->i * 
				z__2.r;
			temp.r = z__1.r, temp.i = z__1.i;
			i__3 = *m;
			for (i = 1; i <= *m; ++i) {
			    i__4 = i + j * c_dim1;
			    i__5 = i + j * c_dim1;
			    i__6 = i + l * a_dim1;
			    z__2.r = temp.r * A(i,l).r - temp.i * A(i,l).i, 
				    z__2.i = temp.r * A(i,l).i + temp.i * A(i,l).r;
			    z__1.r = C(i,j).r + z__2.r, z__1.i = C(i,j).i + 
				    z__2.i;
			    C(i,j).r = z__1.r, C(i,j).i = z__1.i;
/* L180: */
			}
		    }
/* L190: */
		}
/* L200: */
	    }
	} else {

/*           Form  C := alpha*A*B'          + beta*C */

	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		if (beta->r == 0. && beta->i == 0.) {
		    i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			i__3 = i + j * c_dim1;
			C(i,j).r = 0., C(i,j).i = 0.;
/* L210: */
		    }
		} else if (beta->r != 1. || beta->i != 0.) {
		    i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			i__3 = i + j * c_dim1;
			i__4 = i + j * c_dim1;
			z__1.r = beta->r * C(i,j).r - beta->i * C(i,j).i, 
				z__1.i = beta->r * C(i,j).i + beta->i * C(i,j).r;
			C(i,j).r = z__1.r, C(i,j).i = z__1.i;
/* L220: */
		    }
		}
		i__2 = *k;
		for (l = 1; l <= *k; ++l) {
		    i__3 = j + l * b_dim1;
		    if (B(j,l).r != 0. || B(j,l).i != 0.) {
			i__3 = j + l * b_dim1;
			z__1.r = alpha->r * B(j,l).r - alpha->i * B(j,l).i, 
				z__1.i = alpha->r * B(j,l).i + alpha->i * B(j,l).r;
			temp.r = z__1.r, temp.i = z__1.i;
			i__3 = *m;
			for (i = 1; i <= *m; ++i) {
			    i__4 = i + j * c_dim1;
			    i__5 = i + j * c_dim1;
			    i__6 = i + l * a_dim1;
			    z__2.r = temp.r * A(i,l).r - temp.i * A(i,l).i, 
				    z__2.i = temp.r * A(i,l).i + temp.i * A(i,l).r;
			    z__1.r = C(i,j).r + z__2.r, z__1.i = C(i,j).i + 
				    z__2.i;
			    C(i,j).r = z__1.r, C(i,j).i = z__1.i;
/* L230: */
			}
		    }
/* L240: */
		}
/* L250: */
	    }
	}
    } else if (conja) {
	if (conjb) {

/*           Form  C := alpha*conjg( A' )*conjg( B' ) + beta*C. */

	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		i__2 = *m;
		for (i = 1; i <= *m; ++i) {
		    temp.r = 0., temp.i = 0.;
		    i__3 = *k;
		    for (l = 1; l <= *k; ++l) {
			d_cnjg(&z__3, &A(l,i));
			d_cnjg(&z__4, &B(j,l));
			z__2.r = z__3.r * z__4.r - z__3.i * z__4.i, z__2.i = 
				z__3.r * z__4.i + z__3.i * z__4.r;
			z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
			temp.r = z__1.r, temp.i = z__1.i;
/* L260: */
		    }
		    if (beta->r == 0. && beta->i == 0.) {
			i__3 = i + j * c_dim1;
			z__1.r = alpha->r * temp.r - alpha->i * temp.i, 
				z__1.i = alpha->r * temp.i + alpha->i * 
				temp.r;
			C(i,j).r = z__1.r, C(i,j).i = z__1.i;
		    } else {
			i__3 = i + j * c_dim1;
			z__2.r = alpha->r * temp.r - alpha->i * temp.i, 
				z__2.i = alpha->r * temp.i + alpha->i * 
				temp.r;
			i__4 = i + j * c_dim1;
			z__3.r = beta->r * C(i,j).r - beta->i * C(i,j).i, 
				z__3.i = beta->r * C(i,j).i + beta->i * C(i,j).r;
			z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
			C(i,j).r = z__1.r, C(i,j).i = z__1.i;
		    }
/* L270: */
		}
/* L280: */
	    }
	} else {

/*           Form  C := alpha*conjg( A' )*B' + beta*C */

	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		i__2 = *m;
		for (i = 1; i <= *m; ++i) {
		    temp.r = 0., temp.i = 0.;
		    i__3 = *k;
		    for (l = 1; l <= *k; ++l) {
			d_cnjg(&z__3, &A(l,i));
			i__4 = j + l * b_dim1;
			z__2.r = z__3.r * B(j,l).r - z__3.i * B(j,l).i, 
				z__2.i = z__3.r * B(j,l).i + z__3.i * B(j,l)
				.r;
			z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
			temp.r = z__1.r, temp.i = z__1.i;
/* L290: */
		    }
		    if (beta->r == 0. && beta->i == 0.) {
			i__3 = i + j * c_dim1;
			z__1.r = alpha->r * temp.r - alpha->i * temp.i, 
				z__1.i = alpha->r * temp.i + alpha->i * 
				temp.r;
			C(i,j).r = z__1.r, C(i,j).i = z__1.i;
		    } else {
			i__3 = i + j * c_dim1;
			z__2.r = alpha->r * temp.r - alpha->i * temp.i, 
				z__2.i = alpha->r * temp.i + alpha->i * 
				temp.r;
			i__4 = i + j * c_dim1;
			z__3.r = beta->r * C(i,j).r - beta->i * C(i,j).i, 
				z__3.i = beta->r * C(i,j).i + beta->i * C(i,j).r;
			z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
			C(i,j).r = z__1.r, C(i,j).i = z__1.i;
		    }
/* L300: */
		}
/* L310: */
	    }
	}
    } else {
	if (conjb) {

/*           Form  C := alpha*A'*conjg( B' ) + beta*C */

	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		i__2 = *m;
		for (i = 1; i <= *m; ++i) {
		    temp.r = 0., temp.i = 0.;
		    i__3 = *k;
		    for (l = 1; l <= *k; ++l) {
			i__4 = l + i * a_dim1;
			d_cnjg(&z__3, &B(j,l));
			z__2.r = A(l,i).r * z__3.r - A(l,i).i * z__3.i, 
				z__2.i = A(l,i).r * z__3.i + A(l,i).i * 
				z__3.r;
			z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
			temp.r = z__1.r, temp.i = z__1.i;
/* L320: */
		    }
		    if (beta->r == 0. && beta->i == 0.) {
			i__3 = i + j * c_dim1;
			z__1.r = alpha->r * temp.r - alpha->i * temp.i, 
				z__1.i = alpha->r * temp.i + alpha->i * 
				temp.r;
			C(i,j).r = z__1.r, C(i,j).i = z__1.i;
		    } else {
			i__3 = i + j * c_dim1;
			z__2.r = alpha->r * temp.r - alpha->i * temp.i, 
				z__2.i = alpha->r * temp.i + alpha->i * 
				temp.r;
			i__4 = i + j * c_dim1;
			z__3.r = beta->r * C(i,j).r - beta->i * C(i,j).i, 
				z__3.i = beta->r * C(i,j).i + beta->i * C(i,j).r;
			z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
			C(i,j).r = z__1.r, C(i,j).i = z__1.i;
		    }
/* L330: */
		}
/* L340: */
	    }
	} else {

/*           Form  C := alpha*A'*B' + beta*C */

	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		i__2 = *m;
		for (i = 1; i <= *m; ++i) {
		    temp.r = 0., temp.i = 0.;
		    i__3 = *k;
		    for (l = 1; l <= *k; ++l) {
			i__4 = l + i * a_dim1;
			i__5 = j + l * b_dim1;
			z__2.r = A(l,i).r * B(j,l).r - A(l,i).i * B(j,l)
				.i, z__2.i = A(l,i).r * B(j,l).i + A(l,i)
				.i * B(j,l).r;
			z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
			temp.r = z__1.r, temp.i = z__1.i;
/* L350: */
		    }
		    if (beta->r == 0. && beta->i == 0.) {
			i__3 = i + j * c_dim1;
			z__1.r = alpha->r * temp.r - alpha->i * temp.i, 
				z__1.i = alpha->r * temp.i + alpha->i * 
				temp.r;
			C(i,j).r = z__1.r, C(i,j).i = z__1.i;
		    } else {
			i__3 = i + j * c_dim1;
			z__2.r = alpha->r * temp.r - alpha->i * temp.i, 
				z__2.i = alpha->r * temp.i + alpha->i * 
				temp.r;
			i__4 = i + j * c_dim1;
			z__3.r = beta->r * C(i,j).r - beta->i * C(i,j).i, 
				z__3.i = beta->r * C(i,j).i + beta->i * C(i,j).r;
			z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
			C(i,j).r = z__1.r, C(i,j).i = z__1.i;
		    }
/* L360: */
		}
/* L370: */
	    }
	}
    }

    return 0;

/*     End of ZGEMM . */

} /* zgemm_ */

SuperLU_DIST_5.3.0/CBLAS/zgemv.c0000644013363400111340000002364713233431301014770 0ustar  xiaoyessg
/*  -- translated by f2c (version 19940927).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/
#include 
#include "f2c.h"

/* Subroutine */ int zgemv_(char *trans, integer *m, integer *n, 
	doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *
	x, integer *incx, doublecomplex *beta, doublecomplex *y, integer *
	incy)
{


    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
    doublecomplex z__1, z__2, z__3;

    /* Builtin functions */
    void d_cnjg(doublecomplex *, doublecomplex *);

    /* Local variables */
    static integer info;
    static doublecomplex temp;
    static integer lenx, leny, i, j;
    static integer ix, iy, jx, jy, kx, ky;
    extern /* Subroutine */ int input_error_dist(char *, integer *);
    static logical noconj;


/*  Purpose   
    =======   

    ZGEMV  performs one of the matrix-vector operations   

       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   or   

       y := alpha*conjg( A' )*x + beta*y,   

    where alpha and beta are scalars, x and y are vectors and A is an   
    m by n matrix.   

    Parameters   
    ==========   

    TRANS  - CHARACTER*1.   
             On entry, TRANS specifies the operation to be performed as   
             follows:   

                TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.   

                TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.   

                TRANS = 'C' or 'c'   y := alpha*conjg( A' )*x + beta*y.   

             Unchanged on exit.   

    M      - INTEGER.   
             On entry, M specifies the number of rows of the matrix A.   
             M must be at least zero.   
             Unchanged on exit.   

    N      - INTEGER.   
             On entry, N specifies the number of columns of the matrix A. 
  
             N must be at least zero.   
             Unchanged on exit.   

    ALPHA  - COMPLEX*16      .   
             On entry, ALPHA specifies the scalar alpha.   
             Unchanged on exit.   

    A      - COMPLEX*16       array of DIMENSION ( LDA, n ).   
             Before entry, the leading m by n part of the array A must   
             contain the matrix of coefficients.   
             Unchanged on exit.   

    LDA    - INTEGER.   
             On entry, LDA specifies the first dimension of A as declared 
  
             in the calling (sub) program. LDA must be at least   
             max( 1, m ).   
             Unchanged on exit.   

    X      - COMPLEX*16       array of DIMENSION at least   
             ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'   
             and at least   
             ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.   
             Before entry, the incremented array X must contain the   
             vector x.   
             Unchanged on exit.   

    INCX   - INTEGER.   
             On entry, INCX specifies the increment for the elements of   
             X. INCX must not be zero.   
             Unchanged on exit.   

    BETA   - COMPLEX*16      .   
             On entry, BETA specifies the scalar beta. When BETA is   
             supplied as zero then Y need not be set on input.   
             Unchanged on exit.   

    Y      - COMPLEX*16       array of DIMENSION at least   
             ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'   
             and at least   
             ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.   
             Before entry with BETA non-zero, the incremented array Y   
             must contain the vector y. On exit, Y is overwritten by the 
  
             updated vector y.   

    INCY   - INTEGER.   
             On entry, INCY specifies the increment for the elements of   
             Y. INCY must not be zero.   
             Unchanged on exit.   


    Level 2 Blas routine.   

    -- Written on 22-October-1986.   
       Jack Dongarra, Argonne National Lab.   
       Jeremy Du Croz, Nag Central Office.   
       Sven Hammarling, Nag Central Office.   
       Richard Hanson, Sandia National Labs.   



       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
#define X(I) x[(I)-1]
#define Y(I) y[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]

    info = 0;
    if (strncmp(trans, "N", 1)!=0 && strncmp(trans, "T", 1)!=0 &&
	strncmp(trans, "C", 1)!=0) {
	info = 1;
    } else if (*m < 0) {
	info = 2;
    } else if (*n < 0) {
	info = 3;
    } else if (*lda < max(1,*m)) {
	info = 6;
    } else if (*incx == 0) {
	info = 8;
    } else if (*incy == 0) {
	info = 11;
    }
    if (info != 0) {
	input_error_dist("ZGEMV ", &info);
	return 0;
    }

/*     Quick return if possible. */

    if (*m == 0 || *n == 0 || alpha->r == 0. && alpha->i == 0. && (beta->r == 
	    1. && beta->i == 0.)) {
	return 0;
    }

    noconj = (strncmp(trans, "T", 1)==0);

/*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set 
  
       up the start points in  X  and  Y. */

    if (strncmp(trans, "N", 1)==0) {
	lenx = *n;
	leny = *m;
    } else {
	lenx = *m;
	leny = *n;
    }
    if (*incx > 0) {
	kx = 1;
    } else {
	kx = 1 - (lenx - 1) * *incx;
    }
    if (*incy > 0) {
	ky = 1;
    } else {
	ky = 1 - (leny - 1) * *incy;
    }

/*     Start the operations. In this version the elements of A are   
       accessed sequentially with one pass through A.   

       First form  y := beta*y. */

    if (beta->r != 1. || beta->i != 0.) {
	if (*incy == 1) {
	    if (beta->r == 0. && beta->i == 0.) {
		i__1 = leny;
		for (i = 1; i <= leny; ++i) {
		    i__2 = i;
		    Y(i).r = 0., Y(i).i = 0.;
/* L10: */
		}
	    } else {
		i__1 = leny;
		for (i = 1; i <= leny; ++i) {
		    i__2 = i;
		    i__3 = i;
		    z__1.r = beta->r * Y(i).r - beta->i * Y(i).i, 
			    z__1.i = beta->r * Y(i).i + beta->i * Y(i)
			    .r;
		    Y(i).r = z__1.r, Y(i).i = z__1.i;
/* L20: */
		}
	    }
	} else {
	    iy = ky;
	    if (beta->r == 0. && beta->i == 0.) {
		i__1 = leny;
		for (i = 1; i <= leny; ++i) {
		    i__2 = iy;
		    Y(iy).r = 0., Y(iy).i = 0.;
		    iy += *incy;
/* L30: */
		}
	    } else {
		i__1 = leny;
		for (i = 1; i <= leny; ++i) {
		    i__2 = iy;
		    i__3 = iy;
		    z__1.r = beta->r * Y(iy).r - beta->i * Y(iy).i, 
			    z__1.i = beta->r * Y(iy).i + beta->i * Y(iy)
			    .r;
		    Y(iy).r = z__1.r, Y(iy).i = z__1.i;
		    iy += *incy;
/* L40: */
		}
	    }
	}
    }
    if (alpha->r == 0. && alpha->i == 0.) {
	return 0;
    }
    if (strncmp(trans, "N", 1)==0) {

/*        Form  y := alpha*A*x + y. */

	jx = kx;
	if (*incy == 1) {
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		i__2 = jx;
		if (X(jx).r != 0. || X(jx).i != 0.) {
		    i__2 = jx;
		    z__1.r = alpha->r * X(jx).r - alpha->i * X(jx).i, 
			    z__1.i = alpha->r * X(jx).i + alpha->i * X(jx)
			    .r;
		    temp.r = z__1.r, temp.i = z__1.i;
		    i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			i__3 = i;
			i__4 = i;
			i__5 = i + j * a_dim1;
			z__2.r = temp.r * A(i,j).r - temp.i * A(i,j).i, 
				z__2.i = temp.r * A(i,j).i + temp.i * A(i,j)
				.r;
			z__1.r = Y(i).r + z__2.r, z__1.i = Y(i).i + 
				z__2.i;
			Y(i).r = z__1.r, Y(i).i = z__1.i;
/* L50: */
		    }
		}
		jx += *incx;
/* L60: */
	    }
	} else {
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		i__2 = jx;
		if (X(jx).r != 0. || X(jx).i != 0.) {
		    i__2 = jx;
		    z__1.r = alpha->r * X(jx).r - alpha->i * X(jx).i, 
			    z__1.i = alpha->r * X(jx).i + alpha->i * X(jx)
			    .r;
		    temp.r = z__1.r, temp.i = z__1.i;
		    iy = ky;
		    i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			i__3 = iy;
			i__4 = iy;
			i__5 = i + j * a_dim1;
			z__2.r = temp.r * A(i,j).r - temp.i * A(i,j).i, 
				z__2.i = temp.r * A(i,j).i + temp.i * A(i,j)
				.r;
			z__1.r = Y(iy).r + z__2.r, z__1.i = Y(iy).i + 
				z__2.i;
			Y(iy).r = z__1.r, Y(iy).i = z__1.i;
			iy += *incy;
/* L70: */
		    }
		}
		jx += *incx;
/* L80: */
	    }
	}
    } else {

/*        Form  y := alpha*A'*x + y  or  y := alpha*conjg( A' )*x + y.
 */

	jy = ky;
	if (*incx == 1) {
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		temp.r = 0., temp.i = 0.;
		if (noconj) {
		    i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			i__3 = i + j * a_dim1;
			i__4 = i;
			z__2.r = A(i,j).r * X(i).r - A(i,j).i * X(i)
				.i, z__2.i = A(i,j).r * X(i).i + A(i,j)
				.i * X(i).r;
			z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
			temp.r = z__1.r, temp.i = z__1.i;
/* L90: */
		    }
		} else {
		    i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			d_cnjg(&z__3, &A(i,j));
			i__3 = i;
			z__2.r = z__3.r * X(i).r - z__3.i * X(i).i, 
				z__2.i = z__3.r * X(i).i + z__3.i * X(i)
				.r;
			z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
			temp.r = z__1.r, temp.i = z__1.i;
/* L100: */
		    }
		}
		i__2 = jy;
		i__3 = jy;
		z__2.r = alpha->r * temp.r - alpha->i * temp.i, z__2.i = 
			alpha->r * temp.i + alpha->i * temp.r;
		z__1.r = Y(jy).r + z__2.r, z__1.i = Y(jy).i + z__2.i;
		Y(jy).r = z__1.r, Y(jy).i = z__1.i;
		jy += *incy;
/* L110: */
	    }
	} else {
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		temp.r = 0., temp.i = 0.;
		ix = kx;
		if (noconj) {
		    i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			i__3 = i + j * a_dim1;
			i__4 = ix;
			z__2.r = A(i,j).r * X(ix).r - A(i,j).i * X(ix)
				.i, z__2.i = A(i,j).r * X(ix).i + A(i,j)
				.i * X(ix).r;
			z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
			temp.r = z__1.r, temp.i = z__1.i;
			ix += *incx;
/* L120: */
		    }
		} else {
		    i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			d_cnjg(&z__3, &A(i,j));
			i__3 = ix;
			z__2.r = z__3.r * X(ix).r - z__3.i * X(ix).i, 
				z__2.i = z__3.r * X(ix).i + z__3.i * X(ix)
				.r;
			z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
			temp.r = z__1.r, temp.i = z__1.i;
			ix += *incx;
/* L130: */
		    }
		}
		i__2 = jy;
		i__3 = jy;
		z__2.r = alpha->r * temp.r - alpha->i * temp.i, z__2.i = 
			alpha->r * temp.i + alpha->i * temp.r;
		z__1.r = Y(jy).r + z__2.r, z__1.i = Y(jy).i + z__2.i;
		Y(jy).r = z__1.r, Y(jy).i = z__1.i;
		jy += *incy;
/* L140: */
	    }
	}
    }

    return 0;

/*     End of ZGEMV . */

} /* zgemv_ */

SuperLU_DIST_5.3.0/CBLAS/zgerc.c0000644013363400111340000001242113233431301014736 0ustar  xiaoyessg
/*  -- translated by f2c (version 19940927).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/

#include "f2c.h"

/* Subroutine */ int zgerc_(integer *m, integer *n, doublecomplex *alpha, 
	doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, 
	doublecomplex *a, integer *lda)
{


    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
    doublecomplex z__1, z__2;

    /* Builtin functions */
    void d_cnjg(doublecomplex *, doublecomplex *);

    /* Local variables */
    static integer info;
    static doublecomplex temp;
    static integer i, j, ix, jy, kx;
    extern /* Subroutine */ int input_error_dist(char *, integer *);


/*  Purpose   
    =======   

    ZGERC  performs the rank 1 operation   

       A := alpha*x*conjg( y' ) + A,   

    where alpha is a scalar, x is an m element vector, y is an n element 
  
    vector and A is an m by n matrix.   

    Parameters   
    ==========   

    M      - INTEGER.   
             On entry, M specifies the number of rows of the matrix A.   
             M must be at least zero.   
             Unchanged on exit.   

    N      - INTEGER.   
             On entry, N specifies the number of columns of the matrix A. 
  
             N must be at least zero.   
             Unchanged on exit.   

    ALPHA  - COMPLEX*16      .   
             On entry, ALPHA specifies the scalar alpha.   
             Unchanged on exit.   

    X      - COMPLEX*16       array of dimension at least   
             ( 1 + ( m - 1 )*abs( INCX ) ).   
             Before entry, the incremented array X must contain the m   
             element vector x.   
             Unchanged on exit.   

    INCX   - INTEGER.   
             On entry, INCX specifies the increment for the elements of   
             X. INCX must not be zero.   
             Unchanged on exit.   

    Y      - COMPLEX*16       array of dimension at least   
             ( 1 + ( n - 1 )*abs( INCY ) ).   
             Before entry, the incremented array Y must contain the n   
             element vector y.   
             Unchanged on exit.   

    INCY   - INTEGER.   
             On entry, INCY specifies the increment for the elements of   
             Y. INCY must not be zero.   
             Unchanged on exit.   

    A      - COMPLEX*16       array of DIMENSION ( LDA, n ).   
             Before entry, the leading m by n part of the array A must   
             contain the matrix of coefficients. On exit, A is   
             overwritten by the updated matrix.   

    LDA    - INTEGER.   
             On entry, LDA specifies the first dimension of A as declared 
  
             in the calling (sub) program. LDA must be at least   
             max( 1, m ).   
             Unchanged on exit.   


    Level 2 Blas routine.   

    -- Written on 22-October-1986.   
       Jack Dongarra, Argonne National Lab.   
       Jeremy Du Croz, Nag Central Office.   
       Sven Hammarling, Nag Central Office.   
       Richard Hanson, Sandia National Labs.   



       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
#define X(I) x[(I)-1]
#define Y(I) y[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]

    info = 0;
    if (*m < 0) {
	info = 1;
    } else if (*n < 0) {
	info = 2;
    } else if (*incx == 0) {
	info = 5;
    } else if (*incy == 0) {
	info = 7;
    } else if (*lda < max(1,*m)) {
	info = 9;
    }
    if (info != 0) {
	input_error_dist("ZGERC ", &info);
	return 0;
    }

/*     Quick return if possible. */

    if (*m == 0 || *n == 0 || alpha->r == 0. && alpha->i == 0.) {
	return 0;
    }

/*     Start the operations. In this version the elements of A are   
       accessed sequentially with one pass through A. */

    if (*incy > 0) {
	jy = 1;
    } else {
	jy = 1 - (*n - 1) * *incy;
    }
    if (*incx == 1) {
	i__1 = *n;
	for (j = 1; j <= *n; ++j) {
	    i__2 = jy;
	    if (Y(jy).r != 0. || Y(jy).i != 0.) {
		d_cnjg(&z__2, &Y(jy));
		z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = 
			alpha->r * z__2.i + alpha->i * z__2.r;
		temp.r = z__1.r, temp.i = z__1.i;
		i__2 = *m;
		for (i = 1; i <= *m; ++i) {
		    i__3 = i + j * a_dim1;
		    i__4 = i + j * a_dim1;
		    i__5 = i;
		    z__2.r = X(i).r * temp.r - X(i).i * temp.i, z__2.i =
			     X(i).r * temp.i + X(i).i * temp.r;
		    z__1.r = A(i,j).r + z__2.r, z__1.i = A(i,j).i + z__2.i;
		    A(i,j).r = z__1.r, A(i,j).i = z__1.i;
/* L10: */
		}
	    }
	    jy += *incy;
/* L20: */
	}
    } else {
	if (*incx > 0) {
	    kx = 1;
	} else {
	    kx = 1 - (*m - 1) * *incx;
	}
	i__1 = *n;
	for (j = 1; j <= *n; ++j) {
	    i__2 = jy;
	    if (Y(jy).r != 0. || Y(jy).i != 0.) {
		d_cnjg(&z__2, &Y(jy));
		z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = 
			alpha->r * z__2.i + alpha->i * z__2.r;
		temp.r = z__1.r, temp.i = z__1.i;
		ix = kx;
		i__2 = *m;
		for (i = 1; i <= *m; ++i) {
		    i__3 = i + j * a_dim1;
		    i__4 = i + j * a_dim1;
		    i__5 = ix;
		    z__2.r = X(ix).r * temp.r - X(ix).i * temp.i, z__2.i =
			     X(ix).r * temp.i + X(ix).i * temp.r;
		    z__1.r = A(i,j).r + z__2.r, z__1.i = A(i,j).i + z__2.i;
		    A(i,j).r = z__1.r, A(i,j).i = z__1.i;
		    ix += *incx;
/* L30: */
		}
	    }
	    jy += *incy;
/* L40: */
	}
    }

    return 0;

/*     End of ZGERC . */

} /* zgerc_ */

SuperLU_DIST_5.3.0/CBLAS/zgeru.c0000644013363400111340000001225013233431301014760 0ustar  xiaoyessg
/*  -- translated by f2c (version 19940927).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/

#include "f2c.h"

/* Subroutine */ int zgeru_(integer *m, integer *n, doublecomplex *alpha, 
	doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, 
	doublecomplex *a, integer *lda)
{


    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
    doublecomplex z__1, z__2;

    /* Local variables */
    static integer info;
    static doublecomplex temp;
    static integer i, j, ix, jy, kx;
    extern /* Subroutine */ int input_error_dist(char *, integer *);


/*  Purpose   
    =======   

    ZGERU  performs the rank 1 operation   

       A := alpha*x*y' + A,   

    where alpha is a scalar, x is an m element vector, y is an n element 
  
    vector and A is an m by n matrix.   

    Parameters   
    ==========   

    M      - INTEGER.   
             On entry, M specifies the number of rows of the matrix A.   
             M must be at least zero.   
             Unchanged on exit.   

    N      - INTEGER.   
             On entry, N specifies the number of columns of the matrix A. 
  
             N must be at least zero.   
             Unchanged on exit.   

    ALPHA  - COMPLEX*16      .   
             On entry, ALPHA specifies the scalar alpha.   
             Unchanged on exit.   

    X      - COMPLEX*16       array of dimension at least   
             ( 1 + ( m - 1 )*abs( INCX ) ).   
             Before entry, the incremented array X must contain the m   
             element vector x.   
             Unchanged on exit.   

    INCX   - INTEGER.   
             On entry, INCX specifies the increment for the elements of   
             X. INCX must not be zero.   
             Unchanged on exit.   

    Y      - COMPLEX*16       array of dimension at least   
             ( 1 + ( n - 1 )*abs( INCY ) ).   
             Before entry, the incremented array Y must contain the n   
             element vector y.   
             Unchanged on exit.   

    INCY   - INTEGER.   
             On entry, INCY specifies the increment for the elements of   
             Y. INCY must not be zero.   
             Unchanged on exit.   

    A      - COMPLEX*16       array of DIMENSION ( LDA, n ).   
             Before entry, the leading m by n part of the array A must   
             contain the matrix of coefficients. On exit, A is   
             overwritten by the updated matrix.   

    LDA    - INTEGER.   
             On entry, LDA specifies the first dimension of A as declared 
  
             in the calling (sub) program. LDA must be at least   
             max( 1, m ).   
             Unchanged on exit.   


    Level 2 Blas routine.   

    -- Written on 22-October-1986.   
       Jack Dongarra, Argonne National Lab.   
       Jeremy Du Croz, Nag Central Office.   
       Sven Hammarling, Nag Central Office.   
       Richard Hanson, Sandia National Labs.   



       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
#define X(I) x[(I)-1]
#define Y(I) y[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]

    info = 0;
    if (*m < 0) {
	info = 1;
    } else if (*n < 0) {
	info = 2;
    } else if (*incx == 0) {
	info = 5;
    } else if (*incy == 0) {
	info = 7;
    } else if (*lda < max(1,*m)) {
	info = 9;
    }
    if (info != 0) {
	input_error_dist("ZGERU ", &info);
	return 0;
    }

/*     Quick return if possible. */

    if (*m == 0 || *n == 0 || alpha->r == 0. && alpha->i == 0.) {
	return 0;
    }

/*     Start the operations. In this version the elements of A are   
       accessed sequentially with one pass through A. */

    if (*incy > 0) {
	jy = 1;
    } else {
	jy = 1 - (*n - 1) * *incy;
    }
    if (*incx == 1) {
	i__1 = *n;
	for (j = 1; j <= *n; ++j) {
	    i__2 = jy;
	    if (Y(jy).r != 0. || Y(jy).i != 0.) {
		i__2 = jy;
		z__1.r = alpha->r * Y(jy).r - alpha->i * Y(jy).i, z__1.i =
			 alpha->r * Y(jy).i + alpha->i * Y(jy).r;
		temp.r = z__1.r, temp.i = z__1.i;
		i__2 = *m;
		for (i = 1; i <= *m; ++i) {
		    i__3 = i + j * a_dim1;
		    i__4 = i + j * a_dim1;
		    i__5 = i;
		    z__2.r = X(i).r * temp.r - X(i).i * temp.i, z__2.i =
			     X(i).r * temp.i + X(i).i * temp.r;
		    z__1.r = A(i,j).r + z__2.r, z__1.i = A(i,j).i + z__2.i;
		    A(i,j).r = z__1.r, A(i,j).i = z__1.i;
/* L10: */
		}
	    }
	    jy += *incy;
/* L20: */
	}
    } else {
	if (*incx > 0) {
	    kx = 1;
	} else {
	    kx = 1 - (*m - 1) * *incx;
	}
	i__1 = *n;
	for (j = 1; j <= *n; ++j) {
	    i__2 = jy;
	    if (Y(jy).r != 0. || Y(jy).i != 0.) {
		i__2 = jy;
		z__1.r = alpha->r * Y(jy).r - alpha->i * Y(jy).i, z__1.i =
			 alpha->r * Y(jy).i + alpha->i * Y(jy).r;
		temp.r = z__1.r, temp.i = z__1.i;
		ix = kx;
		i__2 = *m;
		for (i = 1; i <= *m; ++i) {
		    i__3 = i + j * a_dim1;
		    i__4 = i + j * a_dim1;
		    i__5 = ix;
		    z__2.r = X(ix).r * temp.r - X(ix).i * temp.i, z__2.i =
			     X(ix).r * temp.i + X(ix).i * temp.r;
		    z__1.r = A(i,j).r + z__2.r, z__1.i = A(i,j).i + z__2.i;
		    A(i,j).r = z__1.r, A(i,j).i = z__1.i;
		    ix += *incx;
/* L30: */
		}
	    }
	    jy += *incy;
/* L40: */
	}
    }

    return 0;

/*     End of ZGERU . */

} /* zgeru_ */

SuperLU_DIST_5.3.0/CBLAS/zher2.c0000644013363400111340000003062413233431301014663 0ustar  xiaoyessg
/*  -- translated by f2c (version 19940927).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/
#include 
#include "f2c.h"

/* Subroutine */ int zher2_(char *uplo, integer *n, doublecomplex *alpha, 
	doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, 
	doublecomplex *a, integer *lda)
{


    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6;
    doublereal d__1;
    doublecomplex z__1, z__2, z__3, z__4;

    /* Builtin functions */
    void d_cnjg(doublecomplex *, doublecomplex *);

    /* Local variables */
    static integer info;
    static doublecomplex temp1, temp2;
    static integer i, j;
    static integer ix, iy, jx, jy, kx, ky;
    extern /* Subroutine */ int input_error_dist(char *, integer *);


/*  Purpose   
    =======   

    ZHER2  performs the hermitian rank 2 operation   

       A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A,   

    where alpha is a scalar, x and y are n element vectors and A is an n 
  
    by n hermitian matrix.   

    Parameters   
    ==========   

    UPLO   - CHARACTER*1.   
             On entry, UPLO specifies whether the upper or lower   
             triangular part of the array A is to be referenced as   
             follows:   

                UPLO = 'U' or 'u'   Only the upper triangular part of A   
                                    is to be referenced.   

                UPLO = 'L' or 'l'   Only the lower triangular part of A   
                                    is to be referenced.   

             Unchanged on exit.   

    N      - INTEGER.   
             On entry, N specifies the order of the matrix A.   
             N must be at least zero.   
             Unchanged on exit.   

    ALPHA  - COMPLEX*16      .   
             On entry, ALPHA specifies the scalar alpha.   
             Unchanged on exit.   

    X      - COMPLEX*16       array of dimension at least   
             ( 1 + ( n - 1 )*abs( INCX ) ).   
             Before entry, the incremented array X must contain the n   
             element vector x.   
             Unchanged on exit.   

    INCX   - INTEGER.   
             On entry, INCX specifies the increment for the elements of   
             X. INCX must not be zero.   
             Unchanged on exit.   

    Y      - COMPLEX*16       array of dimension at least   
             ( 1 + ( n - 1 )*abs( INCY ) ).   
             Before entry, the incremented array Y must contain the n   
             element vector y.   
             Unchanged on exit.   

    INCY   - INTEGER.   
             On entry, INCY specifies the increment for the elements of   
             Y. INCY must not be zero.   
             Unchanged on exit.   

    A      - COMPLEX*16       array of DIMENSION ( LDA, n ).   
             Before entry with  UPLO = 'U' or 'u', the leading n by n   
             upper triangular part of the array A must contain the upper 
  
             triangular part of the hermitian matrix and the strictly   
             lower triangular part of A is not referenced. On exit, the   
             upper triangular part of the array A is overwritten by the   
             upper triangular part of the updated matrix.   
             Before entry with UPLO = 'L' or 'l', the leading n by n   
             lower triangular part of the array A must contain the lower 
  
             triangular part of the hermitian matrix and the strictly   
             upper triangular part of A is not referenced. On exit, the   
             lower triangular part of the array A is overwritten by the   
             lower triangular part of the updated matrix.   
             Note that the imaginary parts of the diagonal elements need 
  
             not be set, they are assumed to be zero, and on exit they   
             are set to zero.   

    LDA    - INTEGER.   
             On entry, LDA specifies the first dimension of A as declared 
  
             in the calling (sub) program. LDA must be at least   
             max( 1, n ).   
             Unchanged on exit.   


    Level 2 Blas routine.   

    -- Written on 22-October-1986.   
       Jack Dongarra, Argonne National Lab.   
       Jeremy Du Croz, Nag Central Office.   
       Sven Hammarling, Nag Central Office.   
       Richard Hanson, Sandia National Labs.   



       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
#define X(I) x[(I)-1]
#define Y(I) y[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]

    info = 0;
    if (strncmp(uplo, "U", 1)!=0 && strncmp(uplo, "L", 1)!=0) {
	info = 1;
    } else if (*n < 0) {
	info = 2;
    } else if (*incx == 0) {
	info = 5;
    } else if (*incy == 0) {
	info = 7;
    } else if (*lda < max(1,*n)) {
	info = 9;
    }
    if (info != 0) {
	input_error_dist("ZHER2 ", &info);
	return 0;
    }

/*     Quick return if possible. */

    if (*n == 0 || alpha->r == 0. && alpha->i == 0.) {
	return 0;
    }

/*     Set up the start points in X and Y if the increments are not both 
  
       unity. */

    if (*incx != 1 || *incy != 1) {
	if (*incx > 0) {
	    kx = 1;
	} else {
	    kx = 1 - (*n - 1) * *incx;
	}
	if (*incy > 0) {
	    ky = 1;
	} else {
	    ky = 1 - (*n - 1) * *incy;
	}
	jx = kx;
	jy = ky;
    }

/*     Start the operations. In this version the elements of A are   
       accessed sequentially with one pass through the triangular part   
       of A. */

    if (strncmp(uplo, "U", 1)==0) {

/*        Form  A  when A is stored in the upper triangle. */

	if (*incx == 1 && *incy == 1) {
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		i__2 = j;
		i__3 = j;
		if (X(j).r != 0. || X(j).i != 0. || (Y(j).r != 0. || 
			Y(j).i != 0.)) {
		    d_cnjg(&z__2, &Y(j));
		    z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = 
			    alpha->r * z__2.i + alpha->i * z__2.r;
		    temp1.r = z__1.r, temp1.i = z__1.i;
		    i__2 = j;
		    z__2.r = alpha->r * X(j).r - alpha->i * X(j).i, 
			    z__2.i = alpha->r * X(j).i + alpha->i * X(j)
			    .r;
		    d_cnjg(&z__1, &z__2);
		    temp2.r = z__1.r, temp2.i = z__1.i;
		    i__2 = j - 1;
		    for (i = 1; i <= j-1; ++i) {
			i__3 = i + j * a_dim1;
			i__4 = i + j * a_dim1;
			i__5 = i;
			z__3.r = X(i).r * temp1.r - X(i).i * temp1.i, 
				z__3.i = X(i).r * temp1.i + X(i).i * 
				temp1.r;
			z__2.r = A(i,j).r + z__3.r, z__2.i = A(i,j).i + 
				z__3.i;
			i__6 = i;
			z__4.r = Y(i).r * temp2.r - Y(i).i * temp2.i, 
				z__4.i = Y(i).r * temp2.i + Y(i).i * 
				temp2.r;
			z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
			A(i,j).r = z__1.r, A(i,j).i = z__1.i;
/* L10: */
		    }
		    i__2 = j + j * a_dim1;
		    i__3 = j + j * a_dim1;
		    i__4 = j;
		    z__2.r = X(j).r * temp1.r - X(j).i * temp1.i, 
			    z__2.i = X(j).r * temp1.i + X(j).i * 
			    temp1.r;
		    i__5 = j;
		    z__3.r = Y(j).r * temp2.r - Y(j).i * temp2.i, 
			    z__3.i = Y(j).r * temp2.i + Y(j).i * 
			    temp2.r;
		    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
		    d__1 = A(j,j).r + z__1.r;
		    A(j,j).r = d__1, A(j,j).i = 0.;
		} else {
		    i__2 = j + j * a_dim1;
		    i__3 = j + j * a_dim1;
		    d__1 = A(j,j).r;
		    A(j,j).r = d__1, A(j,j).i = 0.;
		}
/* L20: */
	    }
	} else {
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		i__2 = jx;
		i__3 = jy;
		if (X(jx).r != 0. || X(jx).i != 0. || (Y(jy).r != 0. || 
			Y(jy).i != 0.)) {
		    d_cnjg(&z__2, &Y(jy));
		    z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = 
			    alpha->r * z__2.i + alpha->i * z__2.r;
		    temp1.r = z__1.r, temp1.i = z__1.i;
		    i__2 = jx;
		    z__2.r = alpha->r * X(jx).r - alpha->i * X(jx).i, 
			    z__2.i = alpha->r * X(jx).i + alpha->i * X(jx)
			    .r;
		    d_cnjg(&z__1, &z__2);
		    temp2.r = z__1.r, temp2.i = z__1.i;
		    ix = kx;
		    iy = ky;
		    i__2 = j - 1;
		    for (i = 1; i <= j-1; ++i) {
			i__3 = i + j * a_dim1;
			i__4 = i + j * a_dim1;
			i__5 = ix;
			z__3.r = X(ix).r * temp1.r - X(ix).i * temp1.i, 
				z__3.i = X(ix).r * temp1.i + X(ix).i * 
				temp1.r;
			z__2.r = A(i,j).r + z__3.r, z__2.i = A(i,j).i + 
				z__3.i;
			i__6 = iy;
			z__4.r = Y(iy).r * temp2.r - Y(iy).i * temp2.i, 
				z__4.i = Y(iy).r * temp2.i + Y(iy).i * 
				temp2.r;
			z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
			A(i,j).r = z__1.r, A(i,j).i = z__1.i;
			ix += *incx;
			iy += *incy;
/* L30: */
		    }
		    i__2 = j + j * a_dim1;
		    i__3 = j + j * a_dim1;
		    i__4 = jx;
		    z__2.r = X(jx).r * temp1.r - X(jx).i * temp1.i, 
			    z__2.i = X(jx).r * temp1.i + X(jx).i * 
			    temp1.r;
		    i__5 = jy;
		    z__3.r = Y(jy).r * temp2.r - Y(jy).i * temp2.i, 
			    z__3.i = Y(jy).r * temp2.i + Y(jy).i * 
			    temp2.r;
		    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
		    d__1 = A(j,j).r + z__1.r;
		    A(j,j).r = d__1, A(j,j).i = 0.;
		} else {
		    i__2 = j + j * a_dim1;
		    i__3 = j + j * a_dim1;
		    d__1 = A(j,j).r;
		    A(j,j).r = d__1, A(j,j).i = 0.;
		}
		jx += *incx;
		jy += *incy;
/* L40: */
	    }
	}
    } else {

/*        Form  A  when A is stored in the lower triangle. */

	if (*incx == 1 && *incy == 1) {
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		i__2 = j;
		i__3 = j;
		if (X(j).r != 0. || X(j).i != 0. || (Y(j).r != 0. || 
			Y(j).i != 0.)) {
		    d_cnjg(&z__2, &Y(j));
		    z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = 
			    alpha->r * z__2.i + alpha->i * z__2.r;
		    temp1.r = z__1.r, temp1.i = z__1.i;
		    i__2 = j;
		    z__2.r = alpha->r * X(j).r - alpha->i * X(j).i, 
			    z__2.i = alpha->r * X(j).i + alpha->i * X(j)
			    .r;
		    d_cnjg(&z__1, &z__2);
		    temp2.r = z__1.r, temp2.i = z__1.i;
		    i__2 = j + j * a_dim1;
		    i__3 = j + j * a_dim1;
		    i__4 = j;
		    z__2.r = X(j).r * temp1.r - X(j).i * temp1.i, 
			    z__2.i = X(j).r * temp1.i + X(j).i * 
			    temp1.r;
		    i__5 = j;
		    z__3.r = Y(j).r * temp2.r - Y(j).i * temp2.i, 
			    z__3.i = Y(j).r * temp2.i + Y(j).i * 
			    temp2.r;
		    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
		    d__1 = A(j,j).r + z__1.r;
		    A(j,j).r = d__1, A(j,j).i = 0.;
		    i__2 = *n;
		    for (i = j + 1; i <= *n; ++i) {
			i__3 = i + j * a_dim1;
			i__4 = i + j * a_dim1;
			i__5 = i;
			z__3.r = X(i).r * temp1.r - X(i).i * temp1.i, 
				z__3.i = X(i).r * temp1.i + X(i).i * 
				temp1.r;
			z__2.r = A(i,j).r + z__3.r, z__2.i = A(i,j).i + 
				z__3.i;
			i__6 = i;
			z__4.r = Y(i).r * temp2.r - Y(i).i * temp2.i, 
				z__4.i = Y(i).r * temp2.i + Y(i).i * 
				temp2.r;
			z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
			A(i,j).r = z__1.r, A(i,j).i = z__1.i;
/* L50: */
		    }
		} else {
		    i__2 = j + j * a_dim1;
		    i__3 = j + j * a_dim1;
		    d__1 = A(j,j).r;
		    A(j,j).r = d__1, A(j,j).i = 0.;
		}
/* L60: */
	    }
	} else {
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		i__2 = jx;
		i__3 = jy;
		if (X(jx).r != 0. || X(jx).i != 0. || (Y(jy).r != 0. || 
			Y(jy).i != 0.)) {
		    d_cnjg(&z__2, &Y(jy));
		    z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = 
			    alpha->r * z__2.i + alpha->i * z__2.r;
		    temp1.r = z__1.r, temp1.i = z__1.i;
		    i__2 = jx;
		    z__2.r = alpha->r * X(jx).r - alpha->i * X(jx).i, 
			    z__2.i = alpha->r * X(jx).i + alpha->i * X(jx)
			    .r;
		    d_cnjg(&z__1, &z__2);
		    temp2.r = z__1.r, temp2.i = z__1.i;
		    i__2 = j + j * a_dim1;
		    i__3 = j + j * a_dim1;
		    i__4 = jx;
		    z__2.r = X(jx).r * temp1.r - X(jx).i * temp1.i, 
			    z__2.i = X(jx).r * temp1.i + X(jx).i * 
			    temp1.r;
		    i__5 = jy;
		    z__3.r = Y(jy).r * temp2.r - Y(jy).i * temp2.i, 
			    z__3.i = Y(jy).r * temp2.i + Y(jy).i * 
			    temp2.r;
		    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
		    d__1 = A(j,j).r + z__1.r;
		    A(j,j).r = d__1, A(j,j).i = 0.;
		    ix = jx;
		    iy = jy;
		    i__2 = *n;
		    for (i = j + 1; i <= *n; ++i) {
			ix += *incx;
			iy += *incy;
			i__3 = i + j * a_dim1;
			i__4 = i + j * a_dim1;
			i__5 = ix;
			z__3.r = X(ix).r * temp1.r - X(ix).i * temp1.i, 
				z__3.i = X(ix).r * temp1.i + X(ix).i * 
				temp1.r;
			z__2.r = A(i,j).r + z__3.r, z__2.i = A(i,j).i + 
				z__3.i;
			i__6 = iy;
			z__4.r = Y(iy).r * temp2.r - Y(iy).i * temp2.i, 
				z__4.i = Y(iy).r * temp2.i + Y(iy).i * 
				temp2.r;
			z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
			A(i,j).r = z__1.r, A(i,j).i = z__1.i;
/* L70: */
		    }
		} else {
		    i__2 = j + j * a_dim1;
		    i__3 = j + j * a_dim1;
		    d__1 = A(j,j).r;
		    A(j,j).r = d__1, A(j,j).i = 0.;
		}
		jx += *incx;
		jy += *incy;
/* L80: */
	    }
	}
    }

    return 0;

/*     End of ZHER2 . */

} /* zher2_ */

SuperLU_DIST_5.3.0/CBLAS/zhemv.c0000644013363400111340000002733413233431301014766 0ustar  xiaoyessg
/*  -- translated by f2c (version 19940927).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/
#include 
#include "f2c.h"

/* Subroutine */ int zhemv_(char *uplo, integer *n, doublecomplex *alpha, 
	doublecomplex *a, integer *lda, doublecomplex *x, integer *incx, 
	doublecomplex *beta, doublecomplex *y, integer *incy)
{


    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
    doublereal d__1;
    doublecomplex z__1, z__2, z__3, z__4;

    /* Builtin functions */
    void d_cnjg(doublecomplex *, doublecomplex *);

    /* Local variables */
    static integer info;
    static doublecomplex temp1, temp2;
    static integer i, j;
    static integer ix, iy, jx, jy, kx, ky;
    extern /* Subroutine */ int input_error_dist(char *, integer *);


/*  Purpose   
    =======   

    ZHEMV  performs the matrix-vector  operation   

       y := alpha*A*x + beta*y,   

    where alpha and beta are scalars, x and y are n element vectors and   
    A is an n by n hermitian matrix.   

    Parameters   
    ==========   

    UPLO   - CHARACTER*1.   
             On entry, UPLO specifies whether the upper or lower   
             triangular part of the array A is to be referenced as   
             follows:   

                UPLO = 'U' or 'u'   Only the upper triangular part of A   
                                    is to be referenced.   

                UPLO = 'L' or 'l'   Only the lower triangular part of A   
                                    is to be referenced.   

             Unchanged on exit.   

    N      - INTEGER.   
             On entry, N specifies the order of the matrix A.   
             N must be at least zero.   
             Unchanged on exit.   

    ALPHA  - COMPLEX*16      .   
             On entry, ALPHA specifies the scalar alpha.   
             Unchanged on exit.   

    A      - COMPLEX*16       array of DIMENSION ( LDA, n ).   
             Before entry with  UPLO = 'U' or 'u', the leading n by n   
             upper triangular part of the array A must contain the upper 
  
             triangular part of the hermitian matrix and the strictly   
             lower triangular part of A is not referenced.   
             Before entry with UPLO = 'L' or 'l', the leading n by n   
             lower triangular part of the array A must contain the lower 
  
             triangular part of the hermitian matrix and the strictly   
             upper triangular part of A is not referenced.   
             Note that the imaginary parts of the diagonal elements need 
  
             not be set and are assumed to be zero.   
             Unchanged on exit.   

    LDA    - INTEGER.   
             On entry, LDA specifies the first dimension of A as declared 
  
             in the calling (sub) program. LDA must be at least   
             max( 1, n ).   
             Unchanged on exit.   

    X      - COMPLEX*16       array of dimension at least   
             ( 1 + ( n - 1 )*abs( INCX ) ).   
             Before entry, the incremented array X must contain the n   
             element vector x.   
             Unchanged on exit.   

    INCX   - INTEGER.   
             On entry, INCX specifies the increment for the elements of   
             X. INCX must not be zero.   
             Unchanged on exit.   

    BETA   - COMPLEX*16      .   
             On entry, BETA specifies the scalar beta. When BETA is   
             supplied as zero then Y need not be set on input.   
             Unchanged on exit.   

    Y      - COMPLEX*16       array of dimension at least   
             ( 1 + ( n - 1 )*abs( INCY ) ).   
             Before entry, the incremented array Y must contain the n   
             element vector y. On exit, Y is overwritten by the updated   
             vector y.   

    INCY   - INTEGER.   
             On entry, INCY specifies the increment for the elements of   
             Y. INCY must not be zero.   
             Unchanged on exit.   


    Level 2 Blas routine.   

    -- Written on 22-October-1986.   
       Jack Dongarra, Argonne National Lab.   
       Jeremy Du Croz, Nag Central Office.   
       Sven Hammarling, Nag Central Office.   
       Richard Hanson, Sandia National Labs.   



       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
#define X(I) x[(I)-1]
#define Y(I) y[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]

    info = 0;
    if (strncmp(uplo, "U", 1)!=0 && strncmp(uplo, "L", 1)!=0) {
	info = 1;
    } else if (*n < 0) {
	info = 2;
    } else if (*lda < max(1,*n)) {
	info = 5;
    } else if (*incx == 0) {
	info = 7;
    } else if (*incy == 0) {
	info = 10;
    }
    if (info != 0) {
	input_error_dist("ZHEMV ", &info);
	return 0;
    }

/*     Quick return if possible. */

    if (*n == 0 || alpha->r == 0. && alpha->i == 0. && (beta->r == 1. && 
	    beta->i == 0.)) {
	return 0;
    }

/*     Set up the start points in  X  and  Y. */

    if (*incx > 0) {
	kx = 1;
    } else {
	kx = 1 - (*n - 1) * *incx;
    }
    if (*incy > 0) {
	ky = 1;
    } else {
	ky = 1 - (*n - 1) * *incy;
    }

/*     Start the operations. In this version the elements of A are   
       accessed sequentially with one pass through the triangular part   
       of A.   

       First form  y := beta*y. */

    if (beta->r != 1. || beta->i != 0.) {
	if (*incy == 1) {
	    if (beta->r == 0. && beta->i == 0.) {
		i__1 = *n;
		for (i = 1; i <= *n; ++i) {
		    i__2 = i;
		    Y(i).r = 0., Y(i).i = 0.;
/* L10: */
		}
	    } else {
		i__1 = *n;
		for (i = 1; i <= *n; ++i) {
		    i__2 = i;
		    i__3 = i;
		    z__1.r = beta->r * Y(i).r - beta->i * Y(i).i, 
			    z__1.i = beta->r * Y(i).i + beta->i * Y(i)
			    .r;
		    Y(i).r = z__1.r, Y(i).i = z__1.i;
/* L20: */
		}
	    }
	} else {
	    iy = ky;
	    if (beta->r == 0. && beta->i == 0.) {
		i__1 = *n;
		for (i = 1; i <= *n; ++i) {
		    i__2 = iy;
		    Y(iy).r = 0., Y(iy).i = 0.;
		    iy += *incy;
/* L30: */
		}
	    } else {
		i__1 = *n;
		for (i = 1; i <= *n; ++i) {
		    i__2 = iy;
		    i__3 = iy;
		    z__1.r = beta->r * Y(iy).r - beta->i * Y(iy).i, 
			    z__1.i = beta->r * Y(iy).i + beta->i * Y(iy)
			    .r;
		    Y(iy).r = z__1.r, Y(iy).i = z__1.i;
		    iy += *incy;
/* L40: */
		}
	    }
	}
    }
    if (alpha->r == 0. && alpha->i == 0.) {
	return 0;
    }
    if (strncmp(uplo, "U", 1)==0) {

/*        Form  y  when A is stored in upper triangle. */

	if (*incx == 1 && *incy == 1) {
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		i__2 = j;
		z__1.r = alpha->r * X(j).r - alpha->i * X(j).i, z__1.i =
			 alpha->r * X(j).i + alpha->i * X(j).r;
		temp1.r = z__1.r, temp1.i = z__1.i;
		temp2.r = 0., temp2.i = 0.;
		i__2 = j - 1;
		for (i = 1; i <= j-1; ++i) {
		    i__3 = i;
		    i__4 = i;
		    i__5 = i + j * a_dim1;
		    z__2.r = temp1.r * A(i,j).r - temp1.i * A(i,j).i, 
			    z__2.i = temp1.r * A(i,j).i + temp1.i * A(i,j)
			    .r;
		    z__1.r = Y(i).r + z__2.r, z__1.i = Y(i).i + z__2.i;
		    Y(i).r = z__1.r, Y(i).i = z__1.i;
		    d_cnjg(&z__3, &A(i,j));
		    i__3 = i;
		    z__2.r = z__3.r * X(i).r - z__3.i * X(i).i, z__2.i =
			     z__3.r * X(i).i + z__3.i * X(i).r;
		    z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
		    temp2.r = z__1.r, temp2.i = z__1.i;
/* L50: */
		}
		i__2 = j;
		i__3 = j;
		i__4 = j + j * a_dim1;
		d__1 = A(j,j).r;
		z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i;
		z__2.r = Y(j).r + z__3.r, z__2.i = Y(j).i + z__3.i;
		z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = 
			alpha->r * temp2.i + alpha->i * temp2.r;
		z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
		Y(j).r = z__1.r, Y(j).i = z__1.i;
/* L60: */
	    }
	} else {
	    jx = kx;
	    jy = ky;
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		i__2 = jx;
		z__1.r = alpha->r * X(jx).r - alpha->i * X(jx).i, z__1.i =
			 alpha->r * X(jx).i + alpha->i * X(jx).r;
		temp1.r = z__1.r, temp1.i = z__1.i;
		temp2.r = 0., temp2.i = 0.;
		ix = kx;
		iy = ky;
		i__2 = j - 1;
		for (i = 1; i <= j-1; ++i) {
		    i__3 = iy;
		    i__4 = iy;
		    i__5 = i + j * a_dim1;
		    z__2.r = temp1.r * A(i,j).r - temp1.i * A(i,j).i, 
			    z__2.i = temp1.r * A(i,j).i + temp1.i * A(i,j)
			    .r;
		    z__1.r = Y(iy).r + z__2.r, z__1.i = Y(iy).i + z__2.i;
		    Y(iy).r = z__1.r, Y(iy).i = z__1.i;
		    d_cnjg(&z__3, &A(i,j));
		    i__3 = ix;
		    z__2.r = z__3.r * X(ix).r - z__3.i * X(ix).i, z__2.i =
			     z__3.r * X(ix).i + z__3.i * X(ix).r;
		    z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
		    temp2.r = z__1.r, temp2.i = z__1.i;
		    ix += *incx;
		    iy += *incy;
/* L70: */
		}
		i__2 = jy;
		i__3 = jy;
		i__4 = j + j * a_dim1;
		d__1 = A(j,j).r;
		z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i;
		z__2.r = Y(jy).r + z__3.r, z__2.i = Y(jy).i + z__3.i;
		z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = 
			alpha->r * temp2.i + alpha->i * temp2.r;
		z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
		Y(jy).r = z__1.r, Y(jy).i = z__1.i;
		jx += *incx;
		jy += *incy;
/* L80: */
	    }
	}
    } else {

/*        Form  y  when A is stored in lower triangle. */

	if (*incx == 1 && *incy == 1) {
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		i__2 = j;
		z__1.r = alpha->r * X(j).r - alpha->i * X(j).i, z__1.i =
			 alpha->r * X(j).i + alpha->i * X(j).r;
		temp1.r = z__1.r, temp1.i = z__1.i;
		temp2.r = 0., temp2.i = 0.;
		i__2 = j;
		i__3 = j;
		i__4 = j + j * a_dim1;
		d__1 = A(j,j).r;
		z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
		z__1.r = Y(j).r + z__2.r, z__1.i = Y(j).i + z__2.i;
		Y(j).r = z__1.r, Y(j).i = z__1.i;
		i__2 = *n;
		for (i = j + 1; i <= *n; ++i) {
		    i__3 = i;
		    i__4 = i;
		    i__5 = i + j * a_dim1;
		    z__2.r = temp1.r * A(i,j).r - temp1.i * A(i,j).i, 
			    z__2.i = temp1.r * A(i,j).i + temp1.i * A(i,j)
			    .r;
		    z__1.r = Y(i).r + z__2.r, z__1.i = Y(i).i + z__2.i;
		    Y(i).r = z__1.r, Y(i).i = z__1.i;
		    d_cnjg(&z__3, &A(i,j));
		    i__3 = i;
		    z__2.r = z__3.r * X(i).r - z__3.i * X(i).i, z__2.i =
			     z__3.r * X(i).i + z__3.i * X(i).r;
		    z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
		    temp2.r = z__1.r, temp2.i = z__1.i;
/* L90: */
		}
		i__2 = j;
		i__3 = j;
		z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = 
			alpha->r * temp2.i + alpha->i * temp2.r;
		z__1.r = Y(j).r + z__2.r, z__1.i = Y(j).i + z__2.i;
		Y(j).r = z__1.r, Y(j).i = z__1.i;
/* L100: */
	    }
	} else {
	    jx = kx;
	    jy = ky;
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		i__2 = jx;
		z__1.r = alpha->r * X(jx).r - alpha->i * X(jx).i, z__1.i =
			 alpha->r * X(jx).i + alpha->i * X(jx).r;
		temp1.r = z__1.r, temp1.i = z__1.i;
		temp2.r = 0., temp2.i = 0.;
		i__2 = jy;
		i__3 = jy;
		i__4 = j + j * a_dim1;
		d__1 = A(j,j).r;
		z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
		z__1.r = Y(jy).r + z__2.r, z__1.i = Y(jy).i + z__2.i;
		Y(jy).r = z__1.r, Y(jy).i = z__1.i;
		ix = jx;
		iy = jy;
		i__2 = *n;
		for (i = j + 1; i <= *n; ++i) {
		    ix += *incx;
		    iy += *incy;
		    i__3 = iy;
		    i__4 = iy;
		    i__5 = i + j * a_dim1;
		    z__2.r = temp1.r * A(i,j).r - temp1.i * A(i,j).i, 
			    z__2.i = temp1.r * A(i,j).i + temp1.i * A(i,j)
			    .r;
		    z__1.r = Y(iy).r + z__2.r, z__1.i = Y(iy).i + z__2.i;
		    Y(iy).r = z__1.r, Y(iy).i = z__1.i;
		    d_cnjg(&z__3, &A(i,j));
		    i__3 = ix;
		    z__2.r = z__3.r * X(ix).r - z__3.i * X(ix).i, z__2.i =
			     z__3.r * X(ix).i + z__3.i * X(ix).r;
		    z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
		    temp2.r = z__1.r, temp2.i = z__1.i;
/* L110: */
		}
		i__2 = jy;
		i__3 = jy;
		z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = 
			alpha->r * temp2.i + alpha->i * temp2.r;
		z__1.r = Y(jy).r + z__2.r, z__1.i = Y(jy).i + z__2.i;
		Y(jy).r = z__1.r, Y(jy).i = z__1.i;
		jx += *incx;
		jy += *incy;
/* L120: */
	    }
	}
    }

    return 0;

/*     End of ZHEMV . */

} /* zhemv_ */

SuperLU_DIST_5.3.0/CBLAS/idamax.c0000644013363400111340000000270613233431301015074 0ustar  xiaoyessg
/*  -- translated by f2c (version 19940927).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/

#include "f2c.h"

integer idamax_(integer *n, doublereal *dx, integer *incx)
{


    /* System generated locals */
    integer ret_val, i__1;
    doublereal d__1;

    /* Local variables */
    static doublereal dmax__;
    static integer i, ix;


/*     finds the index of element having max. absolute value.   
       jack dongarra, linpack, 3/11/78.   
       modified 3/93 to return if incx .le. 0.   
       modified 12/3/93, array(1) declarations changed to array(*)   


    
   Parameter adjustments   
       Function Body */
#define DX(I) dx[(I)-1]


    ret_val = 0;
    if (*n < 1 || *incx <= 0) {
	return ret_val;
    }
    ret_val = 1;
    if (*n == 1) {
	return ret_val;
    }
    if (*incx == 1) {
	goto L20;
    }

/*        code for increment not equal to 1 */

    ix = 1;
    dmax__ = abs(DX(1));
    ix += *incx;
    i__1 = *n;
    for (i = 2; i <= *n; ++i) {
	if ((d__1 = DX(ix), abs(d__1)) <= dmax__) {
	    goto L5;
	}
	ret_val = i;
	dmax__ = (d__1 = DX(ix), abs(d__1));
L5:
	ix += *incx;
/* L10: */
    }
    return ret_val;

/*        code for increment equal to 1 */

L20:
    dmax__ = abs(DX(1));
    i__1 = *n;
    for (i = 2; i <= *n; ++i) {
	if ((d__1 = DX(i), abs(d__1)) <= dmax__) {
	    goto L30;
	}
	ret_val = i;
	dmax__ = (d__1 = DX(i), abs(d__1));
L30:
	;
    }
    return ret_val;
} /* idamax_ */

SuperLU_DIST_5.3.0/CBLAS/CMakeLists.txt0000644013363400111340000000142713233431301016224 0ustar  xiaoyessgset(headers 
    f2c.h
)
set(sources input_error_dist.c)

if (enable_double)
    list(APPEND sources
      idamax.c
      dasum.c
      daxpy.c
      dcopy.c
      ddot.c
      dnrm2.c
      drot.c
      dscal.c
      dgemv.c
      dsymv.c
      dtrsv.c
      dger.c
      dsyr2.c
      dgemm.c
      dtrsm.c
    )
endif()

if (enable_complex16)
    list(APPEND sources
      izamax.c
      dzasum.c
      zaxpy.c
      zcopy.c
      dznrm2.c
      zscal.c
      dcabs1.c
      z_internal.c
      zgemv.c
      zhemv.c
      ztrsv.c
      zgerc.c
      zgeru.c
      zher2.c
      zgemm.c
      ztrsm.c
    )
endif()

add_library(blas ${sources} ${HEADERS})

install(TARGETS blas DESTINATION ${CMAKE_INSTALL_PREFIX}/lib)
install(FILES ${headers} DESTINATION ${CMAKE_INSTALL_PREFIX}/include)
SuperLU_DIST_5.3.0/CBLAS/input_error_dist.c0000644013363400111340000000154513233431301017224 0ustar  xiaoyessg#include 

/*! @file input_error_dist_dist.c
 * \brief Error handler for input parameters.
 *
 * 
 * -- SuperLU routine (version 4.4) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * November 20, 2012
 * 
*/ /*! \brief * *
 * Purpose   
 * =======   
 *
 * INPUT_ERROR is called if an input parameter has an   
 * invalid value.  A message is printed and execution stops.   
 *
 * Arguments   
 * =========   
 *
 * srname  (input) character*6
 *         The name of the routine which called INPUT_ERROR.
 *
 * info    (input) int
 *         The position of the invalid parameter in the parameter list   
 *         of the calling routine.
 *
 * 
*/ int input_error_dist(char *srname, int *info) { printf("** On entry to %6s, parameter number %2d had an illegal value\n", srname, *info); return 0; } SuperLU_DIST_5.3.0/CBLAS/z_internal.c0000644013363400111340000000161613233431301015775 0ustar xiaoyessg#include #include #include "f2c.h" /* Complex Division c = a/b */ void z_div(doublecomplex *c, doublecomplex *a, doublecomplex *b) { double ratio, den; double abr, abi, cr, ci; if( (abr = b->r) < 0.) abr = - abr; if( (abi = b->i) < 0.) abi = - abi; if( abr <= abi ) { if (abi == 0) { fprintf(stderr, "z_div.c: division by zero"); exit(-1); } ratio = b->r / b->i ; den = b->i * (1 + ratio*ratio); cr = (a->r*ratio + a->i) / den; ci = (a->i*ratio - a->r) / den; } else { ratio = b->i / b->r ; den = b->r * (1 + ratio*ratio); cr = (a->r + a->i*ratio) / den; ci = (a->i - a->r*ratio) / den; } c->r = cr; c->i = ci; } /* Return the complex conjugate */ void d_cnjg(doublecomplex *r, doublecomplex *z) { r->r = z->r; r->i = -z->i; } /* Return the imaginary part */ double d_imag(doublecomplex *z) { return (z->i); } SuperLU_DIST_5.3.0/CBLAS/snrm2.c0000644013363400111340000000321313233431301014664 0ustar xiaoyessg /* -- translated by f2c (version 19940927). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ #include "f2c.h" real snrm2_(integer *n, real *x, integer *incx) { /* System generated locals */ integer i__1, i__2; real ret_val, r__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static real norm, scale, absxi; static integer ix; static real ssq; /* SNRM2 returns the euclidean norm of a vector via the function name, so that SNRM2 := sqrt( x'*x ) -- This version written on 25-October-1982. Modified on 14-October-1993 to inline the call to SLASSQ. Sven Hammarling, Nag Ltd. Parameter adjustments Function Body */ #define X(I) x[(I)-1] if (*n < 1 || *incx < 1) { norm = 0.f; } else if (*n == 1) { norm = dabs(X(1)); } else { scale = 0.f; ssq = 1.f; /* The following loop is equivalent to this call to the LAPACK auxiliary routine: CALL SLASSQ( N, X, INCX, SCALE, SSQ ) */ i__1 = (*n - 1) * *incx + 1; i__2 = *incx; for (ix = 1; *incx < 0 ? ix >= (*n-1)**incx+1 : ix <= (*n-1)**incx+1; ix += *incx) { if (X(ix) != 0.f) { absxi = (r__1 = X(ix), dabs(r__1)); if (scale < absxi) { /* Computing 2nd power */ r__1 = scale / absxi; ssq = ssq * (r__1 * r__1) + 1.f; scale = absxi; } else { /* Computing 2nd power */ r__1 = absxi / scale; ssq += r__1 * r__1; } } /* L10: */ } norm = scale * sqrt(ssq); } ret_val = norm; return ret_val; /* End of SNRM2. */ } /* snrm2_ */ SuperLU_DIST_5.3.0/CBLAS/scnrm2.c0000644013363400111340000000373513233431301015040 0ustar xiaoyessg /* -- translated by f2c (version 19940927). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ #include "f2c.h" real scnrm2_(integer *n, complex *x, integer *incx) { /* System generated locals */ integer i__1, i__2, i__3; real ret_val, r__1; /* Builtin functions */ double r_imag(complex *), sqrt(doublereal); /* Local variables */ static real temp, norm, scale; static integer ix; static real ssq; /* SCNRM2 returns the euclidean norm of a vector via the function name, so that SCNRM2 := sqrt( conjg( x' )*x ) -- This version written on 25-October-1982. Modified on 14-October-1993 to inline the call to CLASSQ. Sven Hammarling, Nag Ltd. Parameter adjustments Function Body */ #define X(I) x[(I)-1] if (*n < 1 || *incx < 1) { norm = 0.f; } else { scale = 0.f; ssq = 1.f; /* The following loop is equivalent to this call to the LAPACK auxiliary routine: CALL CLASSQ( N, X, INCX, SCALE, SSQ ) */ i__1 = (*n - 1) * *incx + 1; i__2 = *incx; for (ix = 1; *incx < 0 ? ix >= (*n-1)**incx+1 : ix <= (*n-1)**incx+1; ix += *incx) { i__3 = ix; if (X(ix).r != 0.f) { i__3 = ix; temp = (r__1 = X(ix).r, dabs(r__1)); if (scale < temp) { /* Computing 2nd power */ r__1 = scale / temp; ssq = ssq * (r__1 * r__1) + 1.f; scale = temp; } else { /* Computing 2nd power */ r__1 = temp / scale; ssq += r__1 * r__1; } } if (r_imag(&X(ix)) != 0.f) { temp = (r__1 = r_imag(&X(ix)), dabs(r__1)); if (scale < temp) { /* Computing 2nd power */ r__1 = scale / temp; ssq = ssq * (r__1 * r__1) + 1.f; scale = temp; } else { /* Computing 2nd power */ r__1 = temp / scale; ssq += r__1 * r__1; } } /* L10: */ } norm = scale * sqrt(ssq); } ret_val = norm; return ret_val; /* End of SCNRM2. */ } /* scnrm2_ */ SuperLU_DIST_5.3.0/CBLAS/sscal.c0000644013363400111340000000300213233431301014724 0ustar xiaoyessg /* -- translated by f2c (version 19940927). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ #include "f2c.h" /* Subroutine */ int sscal_(integer *n, real *sa, real *sx, integer *incx) { /* System generated locals */ integer i__1, i__2; /* Local variables */ static integer i, m, nincx, mp1; /* scales a vector by a constant. uses unrolled loops for increment equal to 1. jack dongarra, linpack, 3/11/78. modified 3/93 to return if incx .le. 0. modified 12/3/93, array(1) declarations changed to array(*) Parameter adjustments Function Body */ #define SX(I) sx[(I)-1] if (*n <= 0 || *incx <= 0) { return 0; } if (*incx == 1) { goto L20; } /* code for increment not equal to 1 */ nincx = *n * *incx; i__1 = nincx; i__2 = *incx; for (i = 1; *incx < 0 ? i >= nincx : i <= nincx; i += *incx) { SX(i) = *sa * SX(i); /* L10: */ } return 0; /* code for increment equal to 1 clean-up loop */ L20: m = *n % 5; if (m == 0) { goto L40; } i__2 = m; for (i = 1; i <= m; ++i) { SX(i) = *sa * SX(i); /* L30: */ } if (*n < 5) { return 0; } L40: mp1 = m + 1; i__2 = *n; for (i = mp1; i <= *n; i += 5) { SX(i) = *sa * SX(i); SX(i + 1) = *sa * SX(i + 1); SX(i + 2) = *sa * SX(i + 2); SX(i + 3) = *sa * SX(i + 3); SX(i + 4) = *sa * SX(i + 4); /* L50: */ } return 0; } /* sscal_ */ SuperLU_DIST_5.3.0/CBLAS/zscal.c0000644013363400111340000000254213233431301014743 0ustar xiaoyessg /* -- translated by f2c (version 19940927). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ #include "f2c.h" /* Subroutine */ int zscal_(integer *n, doublecomplex *za, doublecomplex *zx, integer *incx) { /* System generated locals */ integer i__1, i__2, i__3; doublecomplex z__1; /* Local variables */ static integer i, ix; /* scales a vector by a constant. jack dongarra, 3/11/78. modified 3/93 to return if incx .le. 0. modified 12/3/93, array(1) declarations changed to array(*) Parameter adjustments Function Body */ #define ZX(I) zx[(I)-1] if (*n <= 0 || *incx <= 0) { return 0; } if (*incx == 1) { goto L20; } /* code for increment not equal to 1 */ ix = 1; i__1 = *n; for (i = 1; i <= *n; ++i) { i__2 = ix; i__3 = ix; z__1.r = za->r * ZX(ix).r - za->i * ZX(ix).i, z__1.i = za->r * ZX( ix).i + za->i * ZX(ix).r; ZX(ix).r = z__1.r, ZX(ix).i = z__1.i; ix += *incx; /* L10: */ } return 0; /* code for increment equal to 1 */ L20: i__1 = *n; for (i = 1; i <= *n; ++i) { i__2 = i; i__3 = i; z__1.r = za->r * ZX(i).r - za->i * ZX(i).i, z__1.i = za->r * ZX( i).i + za->i * ZX(i).r; ZX(i).r = z__1.r, ZX(i).i = z__1.i; /* L30: */ } return 0; } /* zscal_ */ SuperLU_DIST_5.3.0/CBLAS/ssyr2.c0000644013363400111340000001512413233431301014711 0ustar xiaoyessg /* -- translated by f2c (version 19940927). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ #include #include "f2c.h" /* Subroutine */ int ssyr2_(char *uplo, integer *n, real *alpha, real *x, integer *incx, real *y, integer *incy, real *a, integer *lda) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Local variables */ static integer info; static real temp1, temp2; static integer i, j; static integer ix, iy, jx, jy, kx, ky; extern /* Subroutine */ int input_error_dist(char *, integer *); /* Purpose ======= SSYR2 performs the symmetric rank 2 operation A := alpha*x*y' + alpha*y*x' + A, where alpha is a scalar, x and y are n element vectors and A is an n by n symmetric matrix. Parameters ========== UPLO - CHARACTER*1. On entry, UPLO specifies whether the upper or lower triangular part of the array A is to be referenced as follows: UPLO = 'U' or 'u' Only the upper triangular part of A is to be referenced. UPLO = 'L' or 'l' Only the lower triangular part of A is to be referenced. Unchanged on exit. N - INTEGER. On entry, N specifies the order of the matrix A. N must be at least zero. Unchanged on exit. ALPHA - REAL . On entry, ALPHA specifies the scalar alpha. Unchanged on exit. X - REAL array of dimension at least ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented array X must contain the n element vector x. Unchanged on exit. INCX - INTEGER. On entry, INCX specifies the increment for the elements of X. INCX must not be zero. Unchanged on exit. Y - REAL array of dimension at least ( 1 + ( n - 1 )*abs( INCY ) ). Before entry, the incremented array Y must contain the n element vector y. Unchanged on exit. INCY - INTEGER. On entry, INCY specifies the increment for the elements of Y. INCY must not be zero. Unchanged on exit. A - REAL array of DIMENSION ( LDA, n ). Before entry with UPLO = 'U' or 'u', the leading n by n upper triangular part of the array A must contain the upper triangular part of the symmetric matrix and the strictly lower triangular part of A is not referenced. On exit, the upper triangular part of the array A is overwritten by the upper triangular part of the updated matrix. Before entry with UPLO = 'L' or 'l', the leading n by n lower triangular part of the array A must contain the lower triangular part of the symmetric matrix and the strictly upper triangular part of A is not referenced. On exit, the lower triangular part of the array A is overwritten by the lower triangular part of the updated matrix. LDA - INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. LDA must be at least max( 1, n ). Unchanged on exit. Level 2 Blas routine. -- Written on 22-October-1986. Jack Dongarra, Argonne National Lab. Jeremy Du Croz, Nag Central Office. Sven Hammarling, Nag Central Office. Richard Hanson, Sandia National Labs. Test the input parameters. Parameter adjustments Function Body */ #define X(I) x[(I)-1] #define Y(I) y[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] info = 0; if (strncmp(uplo, "U", 1)!=0 && strncmp(uplo, "L", 1)!=0) { info = 1; } else if (*n < 0) { info = 2; } else if (*incx == 0) { info = 5; } else if (*incy == 0) { info = 7; } else if (*lda < max(1,*n)) { info = 9; } if (info != 0) { input_error_dist("SSYR2 ", &info); return 0; } /* Quick return if possible. */ if (*n == 0 || *alpha == 0.f) { return 0; } /* Set up the start points in X and Y if the increments are not both unity. */ if (*incx != 1 || *incy != 1) { if (*incx > 0) { kx = 1; } else { kx = 1 - (*n - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (*n - 1) * *incy; } jx = kx; jy = ky; } /* Start the operations. In this version the elements of A are accessed sequentially with one pass through the triangular part of A. */ if (strncmp(uplo, "U", 1)==0) { /* Form A when A is stored in the upper triangle. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= *n; ++j) { if (X(j) != 0.f || Y(j) != 0.f) { temp1 = *alpha * Y(j); temp2 = *alpha * X(j); i__2 = j; for (i = 1; i <= j; ++i) { A(i,j) = A(i,j) + X(i) * temp1 + Y(i) * temp2; /* L10: */ } } /* L20: */ } } else { i__1 = *n; for (j = 1; j <= *n; ++j) { if (X(jx) != 0.f || Y(jy) != 0.f) { temp1 = *alpha * Y(jy); temp2 = *alpha * X(jx); ix = kx; iy = ky; i__2 = j; for (i = 1; i <= j; ++i) { A(i,j) = A(i,j) + X(ix) * temp1 + Y(iy) * temp2; ix += *incx; iy += *incy; /* L30: */ } } jx += *incx; jy += *incy; /* L40: */ } } } else { /* Form A when A is stored in the lower triangle. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= *n; ++j) { if (X(j) != 0.f || Y(j) != 0.f) { temp1 = *alpha * Y(j); temp2 = *alpha * X(j); i__2 = *n; for (i = j; i <= *n; ++i) { A(i,j) = A(i,j) + X(i) * temp1 + Y(i) * temp2; /* L50: */ } } /* L60: */ } } else { i__1 = *n; for (j = 1; j <= *n; ++j) { if (X(jx) != 0.f || Y(jy) != 0.f) { temp1 = *alpha * Y(jy); temp2 = *alpha * X(jx); ix = jx; iy = jy; i__2 = *n; for (i = j; i <= *n; ++i) { A(i,j) = A(i,j) + X(ix) * temp1 + Y(iy) * temp2; ix += *incx; iy += *incy; /* L70: */ } } jx += *incx; jy += *incy; /* L80: */ } } } return 0; /* End of SSYR2 . */ } /* ssyr2_ */ SuperLU_DIST_5.3.0/CBLAS/ssymv.c0000644013363400111340000001612713233431301015014 0ustar xiaoyessg /* -- translated by f2c (version 19940927). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ #include #include "f2c.h" /* Subroutine */ int ssymv_(char *uplo, integer *n, real *alpha, real *a, integer *lda, real *x, integer *incx, real *beta, real *y, integer * incy) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Local variables */ static integer info; static real temp1, temp2; static integer i, j; static integer ix, iy, jx, jy, kx, ky; extern /* Subroutine */ int input_error_dist(char *, integer *); /* Purpose ======= SSYMV performs the matrix-vector operation y := alpha*A*x + beta*y, where alpha and beta are scalars, x and y are n element vectors and A is an n by n symmetric matrix. Parameters ========== UPLO - CHARACTER*1. On entry, UPLO specifies whether the upper or lower triangular part of the array A is to be referenced as follows: UPLO = 'U' or 'u' Only the upper triangular part of A is to be referenced. UPLO = 'L' or 'l' Only the lower triangular part of A is to be referenced. Unchanged on exit. N - INTEGER. On entry, N specifies the order of the matrix A. N must be at least zero. Unchanged on exit. ALPHA - REAL . On entry, ALPHA specifies the scalar alpha. Unchanged on exit. A - REAL array of DIMENSION ( LDA, n ). Before entry with UPLO = 'U' or 'u', the leading n by n upper triangular part of the array A must contain the upper triangular part of the symmetric matrix and the strictly lower triangular part of A is not referenced. Before entry with UPLO = 'L' or 'l', the leading n by n lower triangular part of the array A must contain the lower triangular part of the symmetric matrix and the strictly upper triangular part of A is not referenced. Unchanged on exit. LDA - INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. LDA must be at least max( 1, n ). Unchanged on exit. X - REAL array of dimension at least ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented array X must contain the n element vector x. Unchanged on exit. INCX - INTEGER. On entry, INCX specifies the increment for the elements of X. INCX must not be zero. Unchanged on exit. BETA - REAL . On entry, BETA specifies the scalar beta. When BETA is supplied as zero then Y need not be set on input. Unchanged on exit. Y - REAL array of dimension at least ( 1 + ( n - 1 )*abs( INCY ) ). Before entry, the incremented array Y must contain the n element vector y. On exit, Y is overwritten by the updated vector y. INCY - INTEGER. On entry, INCY specifies the increment for the elements of Y. INCY must not be zero. Unchanged on exit. Level 2 Blas routine. -- Written on 22-October-1986. Jack Dongarra, Argonne National Lab. Jeremy Du Croz, Nag Central Office. Sven Hammarling, Nag Central Office. Richard Hanson, Sandia National Labs. Test the input parameters. Parameter adjustments Function Body */ #define X(I) x[(I)-1] #define Y(I) y[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] info = 0; if (strncmp(uplo, "U", 1)!=0 && strncmp(uplo, "L", 1)!=0) { info = 1; } else if (*n < 0) { info = 2; } else if (*lda < max(1,*n)) { info = 5; } else if (*incx == 0) { info = 7; } else if (*incy == 0) { info = 10; } if (info != 0) { input_error_dist("SSYMV ", &info); return 0; } /* Quick return if possible. */ if (*n == 0 || *alpha == 0.f && *beta == 1.f) { return 0; } /* Set up the start points in X and Y. */ if (*incx > 0) { kx = 1; } else { kx = 1 - (*n - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (*n - 1) * *incy; } /* Start the operations. In this version the elements of A are accessed sequentially with one pass through the triangular part of A. First form y := beta*y. */ if (*beta != 1.f) { if (*incy == 1) { if (*beta == 0.f) { i__1 = *n; for (i = 1; i <= *n; ++i) { Y(i) = 0.f; /* L10: */ } } else { i__1 = *n; for (i = 1; i <= *n; ++i) { Y(i) = *beta * Y(i); /* L20: */ } } } else { iy = ky; if (*beta == 0.f) { i__1 = *n; for (i = 1; i <= *n; ++i) { Y(iy) = 0.f; iy += *incy; /* L30: */ } } else { i__1 = *n; for (i = 1; i <= *n; ++i) { Y(iy) = *beta * Y(iy); iy += *incy; /* L40: */ } } } } if (*alpha == 0.f) { return 0; } if (strncmp(uplo, "U", 1)==0) { /* Form y when A is stored in upper triangle. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= *n; ++j) { temp1 = *alpha * X(j); temp2 = 0.f; i__2 = j - 1; for (i = 1; i <= j-1; ++i) { Y(i) += temp1 * A(i,j); temp2 += A(i,j) * X(i); /* L50: */ } Y(j) = Y(j) + temp1 * A(j,j) + *alpha * temp2; /* L60: */ } } else { jx = kx; jy = ky; i__1 = *n; for (j = 1; j <= *n; ++j) { temp1 = *alpha * X(jx); temp2 = 0.f; ix = kx; iy = ky; i__2 = j - 1; for (i = 1; i <= j-1; ++i) { Y(iy) += temp1 * A(i,j); temp2 += A(i,j) * X(ix); ix += *incx; iy += *incy; /* L70: */ } Y(jy) = Y(jy) + temp1 * A(j,j) + *alpha * temp2; jx += *incx; jy += *incy; /* L80: */ } } } else { /* Form y when A is stored in lower triangle. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= *n; ++j) { temp1 = *alpha * X(j); temp2 = 0.f; Y(j) += temp1 * A(j,j); i__2 = *n; for (i = j + 1; i <= *n; ++i) { Y(i) += temp1 * A(i,j); temp2 += A(i,j) * X(i); /* L90: */ } Y(j) += *alpha * temp2; /* L100: */ } } else { jx = kx; jy = ky; i__1 = *n; for (j = 1; j <= *n; ++j) { temp1 = *alpha * X(jx); temp2 = 0.f; Y(jy) += temp1 * A(j,j); ix = jx; iy = jy; i__2 = *n; for (i = j + 1; i <= *n; ++i) { ix += *incx; iy += *incy; Y(iy) += temp1 * A(i,j); temp2 += A(i,j) * X(ix); /* L110: */ } Y(jy) += *alpha * temp2; jx += *incx; jy += *incy; /* L120: */ } } } return 0; /* End of SSYMV . */ } /* ssymv_ */ SuperLU_DIST_5.3.0/CBLAS/strsv.c0000644013363400111340000001713213233431301015011 0ustar xiaoyessg /* -- translated by f2c (version 19940927). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ #include #include "f2c.h" /* Subroutine */ int strsv_(char *uplo, char *trans, char *diag, integer *n, real *a, integer *lda, real *x, integer *incx) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Local variables */ static integer info; static real temp; static integer i, j; static integer ix, jx, kx; extern /* Subroutine */ int input_error_dist(char *, integer *); static logical nounit; /* Purpose ======= STRSV solves one of the systems of equations A*x = b, or A'*x = b, where b and x are n element vectors and A is an n by n unit, or non-unit, upper or lower triangular matrix. No test for singularity or near-singularity is included in this routine. Such tests must be performed before calling this routine. Parameters ========== UPLO - CHARACTER*1. On entry, UPLO specifies whether the matrix is an upper or lower triangular matrix as follows: UPLO = 'U' or 'u' A is an upper triangular matrix. UPLO = 'L' or 'l' A is a lower triangular matrix. Unchanged on exit. TRANS - CHARACTER*1. On entry, TRANS specifies the equations to be solved as follows: TRANS = 'N' or 'n' A*x = b. TRANS = 'T' or 't' A'*x = b. TRANS = 'C' or 'c' A'*x = b. Unchanged on exit. DIAG - CHARACTER*1. On entry, DIAG specifies whether or not A is unit triangular as follows: DIAG = 'U' or 'u' A is assumed to be unit triangular. DIAG = 'N' or 'n' A is not assumed to be unit triangular. Unchanged on exit. N - INTEGER. On entry, N specifies the order of the matrix A. N must be at least zero. Unchanged on exit. A - REAL array of DIMENSION ( LDA, n ). Before entry with UPLO = 'U' or 'u', the leading n by n upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with UPLO = 'L' or 'l', the leading n by n lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when DIAG = 'U' or 'u', the diagonal elements of A are not referenced either, but are assumed to be unity. Unchanged on exit. LDA - INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. LDA must be at least max( 1, n ). Unchanged on exit. X - REAL array of dimension at least ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented array X must contain the n element right-hand side vector b. On exit, X is overwritten with the solution vector x. INCX - INTEGER. On entry, INCX specifies the increment for the elements of X. INCX must not be zero. Unchanged on exit. Level 2 Blas routine. -- Written on 22-October-1986. Jack Dongarra, Argonne National Lab. Jeremy Du Croz, Nag Central Office. Sven Hammarling, Nag Central Office. Richard Hanson, Sandia National Labs. Test the input parameters. Parameter adjustments Function Body */ #define X(I) x[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] info = 0; if (strncmp(uplo, "U", 1)!=0 && strncmp(uplo, "L", 1)!=0) { info = 1; } else if (strncmp(trans, "N", 1)!=0 && strncmp(trans, "T", 1)!=0 && strncmp(trans, "C", 1)!=0) { info = 2; } else if (strncmp(diag, "U", 1)!=0 && strncmp(diag, "N", 1)!=0) { info = 3; } else if (*n < 0) { info = 4; } else if (*lda < max(1,*n)) { info = 6; } else if (*incx == 0) { info = 8; } if (info != 0) { input_error_dist("STRSV ", &info); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } nounit = (strncmp(diag, "N", 1)==0); /* Set up the start point in X if the increment is not unity. This will be ( N - 1 )*INCX too small for descending loops. */ if (*incx <= 0) { kx = 1 - (*n - 1) * *incx; } else if (*incx != 1) { kx = 1; } /* Start the operations. In this version the elements of A are accessed sequentially with one pass through A. */ if (strncmp(trans, "N", 1)==0) { /* Form x := inv( A )*x. */ if (strncmp(uplo, "U", 1)==0) { if (*incx == 1) { for (j = *n; j >= 1; --j) { if (X(j) != 0.f) { if (nounit) { X(j) /= A(j,j); } temp = X(j); for (i = j - 1; i >= 1; --i) { X(i) -= temp * A(i,j); /* L10: */ } } /* L20: */ } } else { jx = kx + (*n - 1) * *incx; for (j = *n; j >= 1; --j) { if (X(jx) != 0.f) { if (nounit) { X(jx) /= A(j,j); } temp = X(jx); ix = jx; for (i = j - 1; i >= 1; --i) { ix -= *incx; X(ix) -= temp * A(i,j); /* L30: */ } } jx -= *incx; /* L40: */ } } } else { if (*incx == 1) { i__1 = *n; for (j = 1; j <= *n; ++j) { if (X(j) != 0.f) { if (nounit) { X(j) /= A(j,j); } temp = X(j); i__2 = *n; for (i = j + 1; i <= *n; ++i) { X(i) -= temp * A(i,j); /* L50: */ } } /* L60: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= *n; ++j) { if (X(jx) != 0.f) { if (nounit) { X(jx) /= A(j,j); } temp = X(jx); ix = jx; i__2 = *n; for (i = j + 1; i <= *n; ++i) { ix += *incx; X(ix) -= temp * A(i,j); /* L70: */ } } jx += *incx; /* L80: */ } } } } else { /* Form x := inv( A' )*x. */ if (strncmp(uplo, "U", 1)==0) { if (*incx == 1) { i__1 = *n; for (j = 1; j <= *n; ++j) { temp = X(j); i__2 = j - 1; for (i = 1; i <= j-1; ++i) { temp -= A(i,j) * X(i); /* L90: */ } if (nounit) { temp /= A(j,j); } X(j) = temp; /* L100: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= *n; ++j) { temp = X(jx); ix = kx; i__2 = j - 1; for (i = 1; i <= j-1; ++i) { temp -= A(i,j) * X(ix); ix += *incx; /* L110: */ } if (nounit) { temp /= A(j,j); } X(jx) = temp; jx += *incx; /* L120: */ } } } else { if (*incx == 1) { for (j = *n; j >= 1; --j) { temp = X(j); i__1 = j + 1; for (i = *n; i >= j+1; --i) { temp -= A(i,j) * X(i); /* L130: */ } if (nounit) { temp /= A(j,j); } X(j) = temp; /* L140: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { temp = X(jx); ix = kx; i__1 = j + 1; for (i = *n; i >= j+1; --i) { temp -= A(i,j) * X(ix); ix -= *incx; /* L150: */ } if (nounit) { temp /= A(j,j); } X(jx) = temp; jx -= *incx; /* L160: */ } } } } return 0; /* End of STRSV . */ } /* strsv_ */ SuperLU_DIST_5.3.0/CBLAS/ddot.c0000644013363400111340000000346013233431301014561 0ustar xiaoyessg /* -- translated by f2c (version 19940927). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ #include "f2c.h" doublereal ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy, integer *incy) { /* System generated locals */ integer i__1; doublereal ret_val; /* Local variables */ static integer i, m; static doublereal dtemp; static integer ix, iy, mp1; /* forms the dot product of two vectors. uses unrolled loops for increments equal to one. jack dongarra, linpack, 3/11/78. modified 12/3/93, array(1) declarations changed to array(*) Parameter adjustments Function Body */ #define DY(I) dy[(I)-1] #define DX(I) dx[(I)-1] ret_val = 0.; dtemp = 0.; if (*n <= 0) { return ret_val; } if (*incx == 1 && *incy == 1) { goto L20; } /* code for unequal increments or equal increments not equal to 1 */ ix = 1; iy = 1; if (*incx < 0) { ix = (-(*n) + 1) * *incx + 1; } if (*incy < 0) { iy = (-(*n) + 1) * *incy + 1; } i__1 = *n; for (i = 1; i <= *n; ++i) { dtemp += DX(ix) * DY(iy); ix += *incx; iy += *incy; /* L10: */ } ret_val = dtemp; return ret_val; /* code for both increments equal to 1 clean-up loop */ L20: m = *n % 5; if (m == 0) { goto L40; } i__1 = m; for (i = 1; i <= m; ++i) { dtemp += DX(i) * DY(i); /* L30: */ } if (*n < 5) { goto L60; } L40: mp1 = m + 1; i__1 = *n; for (i = mp1; i <= *n; i += 5) { dtemp = dtemp + DX(i) * DY(i) + DX(i + 1) * DY(i + 1) + DX(i + 2) * DY(i + 2) + DX(i + 3) * DY(i + 3) + DX(i + 4) * DY(i + 4); /* L50: */ } L60: ret_val = dtemp; return ret_val; } /* ddot_ */ SuperLU_DIST_5.3.0/CBLAS/dger.c0000644013363400111340000001041313233431301014544 0ustar xiaoyessg /* -- translated by f2c (version 19940927). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ #include "f2c.h" /* Subroutine */ int dger_(integer *m, integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *y, integer *incy, doublereal *a, integer *lda) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Local variables */ static integer info; static doublereal temp; static integer i, j, ix, jy, kx; extern /* Subroutine */ int input_error_dist(char *, integer *); /* Purpose ======= DGER performs the rank 1 operation A := alpha*x*y' + A, where alpha is a scalar, x is an m element vector, y is an n element vector and A is an m by n matrix. Parameters ========== M - INTEGER. On entry, M specifies the number of rows of the matrix A. M must be at least zero. Unchanged on exit. N - INTEGER. On entry, N specifies the number of columns of the matrix A. N must be at least zero. Unchanged on exit. ALPHA - DOUBLE PRECISION. On entry, ALPHA specifies the scalar alpha. Unchanged on exit. X - DOUBLE PRECISION array of dimension at least ( 1 + ( m - 1 )*abs( INCX ) ). Before entry, the incremented array X must contain the m element vector x. Unchanged on exit. INCX - INTEGER. On entry, INCX specifies the increment for the elements of X. INCX must not be zero. Unchanged on exit. Y - DOUBLE PRECISION array of dimension at least ( 1 + ( n - 1 )*abs( INCY ) ). Before entry, the incremented array Y must contain the n element vector y. Unchanged on exit. INCY - INTEGER. On entry, INCY specifies the increment for the elements of Y. INCY must not be zero. Unchanged on exit. A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). Before entry, the leading m by n part of the array A must contain the matrix of coefficients. On exit, A is overwritten by the updated matrix. LDA - INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. LDA must be at least max( 1, m ). Unchanged on exit. Level 2 Blas routine. -- Written on 22-October-1986. Jack Dongarra, Argonne National Lab. Jeremy Du Croz, Nag Central Office. Sven Hammarling, Nag Central Office. Richard Hanson, Sandia National Labs. Test the input parameters. Parameter adjustments Function Body */ #define X(I) x[(I)-1] #define Y(I) y[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] info = 0; if (*m < 0) { info = 1; } else if (*n < 0) { info = 2; } else if (*incx == 0) { info = 5; } else if (*incy == 0) { info = 7; } else if (*lda < max(1,*m)) { info = 9; } if (info != 0) { input_error_dist("DGER ", &info); return 0; } /* Quick return if possible. */ if (*m == 0 || *n == 0 || *alpha == 0.) { return 0; } /* Start the operations. In this version the elements of A are accessed sequentially with one pass through A. */ if (*incy > 0) { jy = 1; } else { jy = 1 - (*n - 1) * *incy; } if (*incx == 1) { i__1 = *n; for (j = 1; j <= *n; ++j) { if (Y(jy) != 0.) { temp = *alpha * Y(jy); i__2 = *m; for (i = 1; i <= *m; ++i) { A(i,j) += X(i) * temp; /* L10: */ } } jy += *incy; /* L20: */ } } else { if (*incx > 0) { kx = 1; } else { kx = 1 - (*m - 1) * *incx; } i__1 = *n; for (j = 1; j <= *n; ++j) { if (Y(jy) != 0.) { temp = *alpha * Y(jy); ix = kx; i__2 = *m; for (i = 1; i <= *m; ++i) { A(i,j) += X(ix) * temp; ix += *incx; /* L30: */ } } jy += *incy; /* L40: */ } } return 0; /* End of DGER . */ } /* dger_ */ SuperLU_DIST_5.3.0/CBLAS/ztrsm.c0000644013363400111340000004355413233431301015016 0ustar xiaoyessg /* -- translated by f2c (version 19940927). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ #include #include "f2c.h" /* Table of constant values */ static doublecomplex c_b1 = {1.,0.}; /* Subroutine */ int ztrsm_(char *side, char *uplo, char *transa, char *diag, integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; doublecomplex z__1, z__2, z__3; /* Builtin functions */ void z_div(doublecomplex *, doublecomplex *, doublecomplex *), d_cnjg( doublecomplex *, doublecomplex *); /* Local variables */ static integer info; static doublecomplex temp; static integer i, j, k; static logical lside; static integer nrowa; static logical upper; extern /* Subroutine */ int input_error_dist(char *, integer *); static logical noconj, nounit; /* Purpose ======= ZTRSM solves one of the matrix equations op( A )*X = alpha*B, or X*op( A ) = alpha*B, where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op( A ) is one of op( A ) = A or op( A ) = A' or op( A ) = conjg( A' ). The matrix X is overwritten on B. Parameters ========== SIDE - CHARACTER*1. On entry, SIDE specifies whether op( A ) appears on the left or right of X as follows: SIDE = 'L' or 'l' op( A )*X = alpha*B. SIDE = 'R' or 'r' X*op( A ) = alpha*B. Unchanged on exit. UPLO - CHARACTER*1. On entry, UPLO specifies whether the matrix A is an upper or lower triangular matrix as follows: UPLO = 'U' or 'u' A is an upper triangular matrix. UPLO = 'L' or 'l' A is a lower triangular matrix. Unchanged on exit. TRANSA - CHARACTER*1. On entry, TRANSA specifies the form of op( A ) to be used in the matrix multiplication as follows: TRANSA = 'N' or 'n' op( A ) = A. TRANSA = 'T' or 't' op( A ) = A'. TRANSA = 'C' or 'c' op( A ) = conjg( A' ). Unchanged on exit. DIAG - CHARACTER*1. On entry, DIAG specifies whether or not A is unit triangular as follows: DIAG = 'U' or 'u' A is assumed to be unit triangular. DIAG = 'N' or 'n' A is not assumed to be unit triangular. Unchanged on exit. M - INTEGER. On entry, M specifies the number of rows of B. M must be at least zero. Unchanged on exit. N - INTEGER. On entry, N specifies the number of columns of B. N must be at least zero. Unchanged on exit. ALPHA - COMPLEX*16 . On entry, ALPHA specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. Unchanged on exit. A - COMPLEX*16 array of DIMENSION ( LDA, k ), where k is m when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. Before entry with UPLO = 'U' or 'u', the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with UPLO = 'L' or 'l', the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when DIAG = 'U' or 'u', the diagonal elements of A are not referenced either, but are assumed to be unity. Unchanged on exit. LDA - INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. When SIDE = 'L' or 'l' then LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' then LDA must be at least max( 1, n ). Unchanged on exit. B - COMPLEX*16 array of DIMENSION ( LDB, n ). Before entry, the leading m by n part of the array B must contain the right-hand side matrix B, and on exit is overwritten by the solution matrix X. LDB - INTEGER. On entry, LDB specifies the first dimension of B as declared in the calling (sub) program. LDB must be at least max( 1, m ). Unchanged on exit. Level 3 Blas routine. -- Written on 8-February-1989. Jack Dongarra, Argonne National Laboratory. Iain Duff, AERE Harwell. Jeremy Du Croz, Numerical Algorithms Group Ltd. Sven Hammarling, Numerical Algorithms Group Ltd. Test the input parameters. Parameter adjustments Function Body */ #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] #define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)] lside = (strncmp(side, "L", 1)==0); if (lside) { nrowa = *m; } else { nrowa = *n; } noconj = (strncmp(transa, "T", 1)==0); nounit = (strncmp(diag, "N", 1)==0); upper = (strncmp(uplo, "U", 1)==0); info = 0; if (! lside && strncmp(side, "R", 1)!=0) { info = 1; } else if (! upper && strncmp(uplo, "L", 1)!=0) { info = 2; } else if (strncmp(transa, "N", 1)!=0 && strncmp(transa, "T", 1)!=0 && strncmp(transa, "C", 1)!=0) { info = 3; } else if (strncmp(diag, "U", 1)!=0 && strncmp(diag, "N", 1)!=0) { info = 4; } else if (*m < 0) { info = 5; } else if (*n < 0) { info = 6; } else if (*lda < max(1,nrowa)) { info = 9; } else if (*ldb < max(1,*m)) { info = 11; } if (info != 0) { input_error_dist("ZTRSM ", &info); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } /* And when alpha.eq.zero. */ if (alpha->r == 0. && alpha->i == 0.) { i__1 = *n; for (j = 1; j <= *n; ++j) { i__2 = *m; for (i = 1; i <= *m; ++i) { i__3 = i + j * b_dim1; B(i,j).r = 0., B(i,j).i = 0.; /* L10: */ } /* L20: */ } return 0; } /* Start the operations. */ if (lside) { if (strncmp(transa, "N", 1)==0) { /* Form B := alpha*inv( A )*B. */ if (upper) { i__1 = *n; for (j = 1; j <= *n; ++j) { if (alpha->r != 1. || alpha->i != 0.) { i__2 = *m; for (i = 1; i <= *m; ++i) { i__3 = i + j * b_dim1; i__4 = i + j * b_dim1; z__1.r = alpha->r * B(i,j).r - alpha->i * B(i,j) .i, z__1.i = alpha->r * B(i,j).i + alpha->i * B(i,j).r; B(i,j).r = z__1.r, B(i,j).i = z__1.i; /* L30: */ } } for (k = *m; k >= 1; --k) { i__2 = k + j * b_dim1; if (B(k,j).r != 0. || B(k,j).i != 0.) { if (nounit) { i__2 = k + j * b_dim1; z_div(&z__1, &B(k,j), &A(k,k)); B(k,j).r = z__1.r, B(k,j).i = z__1.i; } i__2 = k - 1; for (i = 1; i <= k-1; ++i) { i__3 = i + j * b_dim1; i__4 = i + j * b_dim1; i__5 = k + j * b_dim1; i__6 = i + k * a_dim1; z__2.r = B(k,j).r * A(i,k).r - B(k,j).i * A(i,k).i, z__2.i = B(k,j).r * A(i,k).i + B(k,j).i * A(i,k).r; z__1.r = B(i,j).r - z__2.r, z__1.i = B(i,j) .i - z__2.i; B(i,j).r = z__1.r, B(i,j).i = z__1.i; /* L40: */ } } /* L50: */ } /* L60: */ } } else { i__1 = *n; for (j = 1; j <= *n; ++j) { if (alpha->r != 1. || alpha->i != 0.) { i__2 = *m; for (i = 1; i <= *m; ++i) { i__3 = i + j * b_dim1; i__4 = i + j * b_dim1; z__1.r = alpha->r * B(i,j).r - alpha->i * B(i,j) .i, z__1.i = alpha->r * B(i,j).i + alpha->i * B(i,j).r; B(i,j).r = z__1.r, B(i,j).i = z__1.i; /* L70: */ } } i__2 = *m; for (k = 1; k <= *m; ++k) { i__3 = k + j * b_dim1; if (B(k,j).r != 0. || B(k,j).i != 0.) { if (nounit) { i__3 = k + j * b_dim1; z_div(&z__1, &B(k,j), &A(k,k)); B(k,j).r = z__1.r, B(k,j).i = z__1.i; } i__3 = *m; for (i = k + 1; i <= *m; ++i) { i__4 = i + j * b_dim1; i__5 = i + j * b_dim1; i__6 = k + j * b_dim1; i__7 = i + k * a_dim1; z__2.r = B(k,j).r * A(i,k).r - B(k,j).i * A(i,k).i, z__2.i = B(k,j).r * A(i,k).i + B(k,j).i * A(i,k).r; z__1.r = B(i,j).r - z__2.r, z__1.i = B(i,j) .i - z__2.i; B(i,j).r = z__1.r, B(i,j).i = z__1.i; /* L80: */ } } /* L90: */ } /* L100: */ } } } else { /* Form B := alpha*inv( A' )*B or B := alpha*inv( conjg( A' ) )*B. */ if (upper) { i__1 = *n; for (j = 1; j <= *n; ++j) { i__2 = *m; for (i = 1; i <= *m; ++i) { i__3 = i + j * b_dim1; z__1.r = alpha->r * B(i,j).r - alpha->i * B(i,j).i, z__1.i = alpha->r * B(i,j).i + alpha->i * B(i,j).r; temp.r = z__1.r, temp.i = z__1.i; if (noconj) { i__3 = i - 1; for (k = 1; k <= i-1; ++k) { i__4 = k + i * a_dim1; i__5 = k + j * b_dim1; z__2.r = A(k,i).r * B(k,j).r - A(k,i).i * B(k,j).i, z__2.i = A(k,i).r * B(k,j).i + A(k,i).i * B(k,j).r; z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; temp.r = z__1.r, temp.i = z__1.i; /* L110: */ } if (nounit) { z_div(&z__1, &temp, &A(i,i)); temp.r = z__1.r, temp.i = z__1.i; } } else { i__3 = i - 1; for (k = 1; k <= i-1; ++k) { d_cnjg(&z__3, &A(k,i)); i__4 = k + j * b_dim1; z__2.r = z__3.r * B(k,j).r - z__3.i * B(k,j) .i, z__2.i = z__3.r * B(k,j).i + z__3.i * B(k,j).r; z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; temp.r = z__1.r, temp.i = z__1.i; /* L120: */ } if (nounit) { d_cnjg(&z__2, &A(i,i)); z_div(&z__1, &temp, &z__2); temp.r = z__1.r, temp.i = z__1.i; } } i__3 = i + j * b_dim1; B(i,j).r = temp.r, B(i,j).i = temp.i; /* L130: */ } /* L140: */ } } else { i__1 = *n; for (j = 1; j <= *n; ++j) { for (i = *m; i >= 1; --i) { i__2 = i + j * b_dim1; z__1.r = alpha->r * B(i,j).r - alpha->i * B(i,j).i, z__1.i = alpha->r * B(i,j).i + alpha->i * B(i,j).r; temp.r = z__1.r, temp.i = z__1.i; if (noconj) { i__2 = *m; for (k = i + 1; k <= *m; ++k) { i__3 = k + i * a_dim1; i__4 = k + j * b_dim1; z__2.r = A(k,i).r * B(k,j).r - A(k,i).i * B(k,j).i, z__2.i = A(k,i).r * B(k,j).i + A(k,i).i * B(k,j).r; z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; temp.r = z__1.r, temp.i = z__1.i; /* L150: */ } if (nounit) { z_div(&z__1, &temp, &A(i,i)); temp.r = z__1.r, temp.i = z__1.i; } } else { i__2 = *m; for (k = i + 1; k <= *m; ++k) { d_cnjg(&z__3, &A(k,i)); i__3 = k + j * b_dim1; z__2.r = z__3.r * B(k,j).r - z__3.i * B(k,j) .i, z__2.i = z__3.r * B(k,j).i + z__3.i * B(k,j).r; z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; temp.r = z__1.r, temp.i = z__1.i; /* L160: */ } if (nounit) { d_cnjg(&z__2, &A(i,i)); z_div(&z__1, &temp, &z__2); temp.r = z__1.r, temp.i = z__1.i; } } i__2 = i + j * b_dim1; B(i,j).r = temp.r, B(i,j).i = temp.i; /* L170: */ } /* L180: */ } } } } else { if (strncmp(transa, "N", 1)==0) { /* Form B := alpha*B*inv( A ). */ if (upper) { i__1 = *n; for (j = 1; j <= *n; ++j) { if (alpha->r != 1. || alpha->i != 0.) { i__2 = *m; for (i = 1; i <= *m; ++i) { i__3 = i + j * b_dim1; i__4 = i + j * b_dim1; z__1.r = alpha->r * B(i,j).r - alpha->i * B(i,j) .i, z__1.i = alpha->r * B(i,j).i + alpha->i * B(i,j).r; B(i,j).r = z__1.r, B(i,j).i = z__1.i; /* L190: */ } } i__2 = j - 1; for (k = 1; k <= j-1; ++k) { i__3 = k + j * a_dim1; if (A(k,j).r != 0. || A(k,j).i != 0.) { i__3 = *m; for (i = 1; i <= *m; ++i) { i__4 = i + j * b_dim1; i__5 = i + j * b_dim1; i__6 = k + j * a_dim1; i__7 = i + k * b_dim1; z__2.r = A(k,j).r * B(i,k).r - A(k,j).i * B(i,k).i, z__2.i = A(k,j).r * B(i,k).i + A(k,j).i * B(i,k).r; z__1.r = B(i,j).r - z__2.r, z__1.i = B(i,j) .i - z__2.i; B(i,j).r = z__1.r, B(i,j).i = z__1.i; /* L200: */ } } /* L210: */ } if (nounit) { z_div(&z__1, &c_b1, &A(j,j)); temp.r = z__1.r, temp.i = z__1.i; i__2 = *m; for (i = 1; i <= *m; ++i) { i__3 = i + j * b_dim1; i__4 = i + j * b_dim1; z__1.r = temp.r * B(i,j).r - temp.i * B(i,j).i, z__1.i = temp.r * B(i,j).i + temp.i * B(i,j).r; B(i,j).r = z__1.r, B(i,j).i = z__1.i; /* L220: */ } } /* L230: */ } } else { for (j = *n; j >= 1; --j) { if (alpha->r != 1. || alpha->i != 0.) { i__1 = *m; for (i = 1; i <= *m; ++i) { i__2 = i + j * b_dim1; i__3 = i + j * b_dim1; z__1.r = alpha->r * B(i,j).r - alpha->i * B(i,j) .i, z__1.i = alpha->r * B(i,j).i + alpha->i * B(i,j).r; B(i,j).r = z__1.r, B(i,j).i = z__1.i; /* L240: */ } } i__1 = *n; for (k = j + 1; k <= *n; ++k) { i__2 = k + j * a_dim1; if (A(k,j).r != 0. || A(k,j).i != 0.) { i__2 = *m; for (i = 1; i <= *m; ++i) { i__3 = i + j * b_dim1; i__4 = i + j * b_dim1; i__5 = k + j * a_dim1; i__6 = i + k * b_dim1; z__2.r = A(k,j).r * B(i,k).r - A(k,j).i * B(i,k).i, z__2.i = A(k,j).r * B(i,k).i + A(k,j).i * B(i,k).r; z__1.r = B(i,j).r - z__2.r, z__1.i = B(i,j) .i - z__2.i; B(i,j).r = z__1.r, B(i,j).i = z__1.i; /* L250: */ } } /* L260: */ } if (nounit) { z_div(&z__1, &c_b1, &A(j,j)); temp.r = z__1.r, temp.i = z__1.i; i__1 = *m; for (i = 1; i <= *m; ++i) { i__2 = i + j * b_dim1; i__3 = i + j * b_dim1; z__1.r = temp.r * B(i,j).r - temp.i * B(i,j).i, z__1.i = temp.r * B(i,j).i + temp.i * B(i,j).r; B(i,j).r = z__1.r, B(i,j).i = z__1.i; /* L270: */ } } /* L280: */ } } } else { /* Form B := alpha*B*inv( A' ) or B := alpha*B*inv( conjg( A' ) ). */ if (upper) { for (k = *n; k >= 1; --k) { if (nounit) { if (noconj) { z_div(&z__1, &c_b1, &A(k,k)); temp.r = z__1.r, temp.i = z__1.i; } else { d_cnjg(&z__2, &A(k,k)); z_div(&z__1, &c_b1, &z__2); temp.r = z__1.r, temp.i = z__1.i; } i__1 = *m; for (i = 1; i <= *m; ++i) { i__2 = i + k * b_dim1; i__3 = i + k * b_dim1; z__1.r = temp.r * B(i,k).r - temp.i * B(i,k).i, z__1.i = temp.r * B(i,k).i + temp.i * B(i,k).r; B(i,k).r = z__1.r, B(i,k).i = z__1.i; /* L290: */ } } i__1 = k - 1; for (j = 1; j <= k-1; ++j) { i__2 = j + k * a_dim1; if (A(j,k).r != 0. || A(j,k).i != 0.) { if (noconj) { i__2 = j + k * a_dim1; temp.r = A(j,k).r, temp.i = A(j,k).i; } else { d_cnjg(&z__1, &A(j,k)); temp.r = z__1.r, temp.i = z__1.i; } i__2 = *m; for (i = 1; i <= *m; ++i) { i__3 = i + j * b_dim1; i__4 = i + j * b_dim1; i__5 = i + k * b_dim1; z__2.r = temp.r * B(i,k).r - temp.i * B(i,k) .i, z__2.i = temp.r * B(i,k).i + temp.i * B(i,k).r; z__1.r = B(i,j).r - z__2.r, z__1.i = B(i,j) .i - z__2.i; B(i,j).r = z__1.r, B(i,j).i = z__1.i; /* L300: */ } } /* L310: */ } if (alpha->r != 1. || alpha->i != 0.) { i__1 = *m; for (i = 1; i <= *m; ++i) { i__2 = i + k * b_dim1; i__3 = i + k * b_dim1; z__1.r = alpha->r * B(i,k).r - alpha->i * B(i,k) .i, z__1.i = alpha->r * B(i,k).i + alpha->i * B(i,k).r; B(i,k).r = z__1.r, B(i,k).i = z__1.i; /* L320: */ } } /* L330: */ } } else { i__1 = *n; for (k = 1; k <= *n; ++k) { if (nounit) { if (noconj) { z_div(&z__1, &c_b1, &A(k,k)); temp.r = z__1.r, temp.i = z__1.i; } else { d_cnjg(&z__2, &A(k,k)); z_div(&z__1, &c_b1, &z__2); temp.r = z__1.r, temp.i = z__1.i; } i__2 = *m; for (i = 1; i <= *m; ++i) { i__3 = i + k * b_dim1; i__4 = i + k * b_dim1; z__1.r = temp.r * B(i,k).r - temp.i * B(i,k).i, z__1.i = temp.r * B(i,k).i + temp.i * B(i,k).r; B(i,k).r = z__1.r, B(i,k).i = z__1.i; /* L340: */ } } i__2 = *n; for (j = k + 1; j <= *n; ++j) { i__3 = j + k * a_dim1; if (A(j,k).r != 0. || A(j,k).i != 0.) { if (noconj) { i__3 = j + k * a_dim1; temp.r = A(j,k).r, temp.i = A(j,k).i; } else { d_cnjg(&z__1, &A(j,k)); temp.r = z__1.r, temp.i = z__1.i; } i__3 = *m; for (i = 1; i <= *m; ++i) { i__4 = i + j * b_dim1; i__5 = i + j * b_dim1; i__6 = i + k * b_dim1; z__2.r = temp.r * B(i,k).r - temp.i * B(i,k) .i, z__2.i = temp.r * B(i,k).i + temp.i * B(i,k).r; z__1.r = B(i,j).r - z__2.r, z__1.i = B(i,j) .i - z__2.i; B(i,j).r = z__1.r, B(i,j).i = z__1.i; /* L350: */ } } /* L360: */ } if (alpha->r != 1. || alpha->i != 0.) { i__2 = *m; for (i = 1; i <= *m; ++i) { i__3 = i + k * b_dim1; i__4 = i + k * b_dim1; z__1.r = alpha->r * B(i,k).r - alpha->i * B(i,k) .i, z__1.i = alpha->r * B(i,k).i + alpha->i * B(i,k).r; B(i,k).r = z__1.r, B(i,k).i = z__1.i; /* L370: */ } } /* L380: */ } } } } return 0; /* End of ZTRSM . */ } /* ztrsm_ */ SuperLU_DIST_5.3.0/CBLAS/ztrsv.c0000644013363400111340000003175713233431301015031 0ustar xiaoyessg /* -- translated by f2c (version 19940927). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ #include #include "f2c.h" /* Subroutine */ int ztrsv_(char *uplo, char *trans, char *diag, integer *n, doublecomplex *a, integer *lda, doublecomplex *x, integer *incx) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; doublecomplex z__1, z__2, z__3; /* Builtin functions */ void z_div(doublecomplex *, doublecomplex *, doublecomplex *), d_cnjg( doublecomplex *, doublecomplex *); /* Local variables */ static integer info; static doublecomplex temp; static integer i, j; static integer ix, jx, kx; extern /* Subroutine */ int input_error_dist(char *, integer *); static logical noconj, nounit; /* Purpose ======= ZTRSV solves one of the systems of equations A*x = b, or A'*x = b, or conjg( A' )*x = b, where b and x are n element vectors and A is an n by n unit, or non-unit, upper or lower triangular matrix. No test for singularity or near-singularity is included in this routine. Such tests must be performed before calling this routine. Parameters ========== UPLO - CHARACTER*1. On entry, UPLO specifies whether the matrix is an upper or lower triangular matrix as follows: UPLO = 'U' or 'u' A is an upper triangular matrix. UPLO = 'L' or 'l' A is a lower triangular matrix. Unchanged on exit. TRANS - CHARACTER*1. On entry, TRANS specifies the equations to be solved as follows: TRANS = 'N' or 'n' A*x = b. TRANS = 'T' or 't' A'*x = b. TRANS = 'C' or 'c' conjg( A' )*x = b. Unchanged on exit. DIAG - CHARACTER*1. On entry, DIAG specifies whether or not A is unit triangular as follows: DIAG = 'U' or 'u' A is assumed to be unit triangular. DIAG = 'N' or 'n' A is not assumed to be unit triangular. Unchanged on exit. N - INTEGER. On entry, N specifies the order of the matrix A. N must be at least zero. Unchanged on exit. A - COMPLEX*16 array of DIMENSION ( LDA, n ). Before entry with UPLO = 'U' or 'u', the leading n by n upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with UPLO = 'L' or 'l', the leading n by n lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when DIAG = 'U' or 'u', the diagonal elements of A are not referenced either, but are assumed to be unity. Unchanged on exit. LDA - INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. LDA must be at least max( 1, n ). Unchanged on exit. X - COMPLEX*16 array of dimension at least ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented array X must contain the n element right-hand side vector b. On exit, X is overwritten with the solution vector x. INCX - INTEGER. On entry, INCX specifies the increment for the elements of X. INCX must not be zero. Unchanged on exit. Level 2 Blas routine. -- Written on 22-October-1986. Jack Dongarra, Argonne National Lab. Jeremy Du Croz, Nag Central Office. Sven Hammarling, Nag Central Office. Richard Hanson, Sandia National Labs. Test the input parameters. Parameter adjustments Function Body */ #define X(I) x[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] info = 0; if (strncmp(uplo, "U", 1)!=0 && strncmp(uplo, "L", 1)!=0) { info = 1; } else if (strncmp(trans, "N", 1)!=0 && strncmp(trans, "T", 1)!=0 && strncmp(trans, "C", 1)!=0) { info = 2; } else if (strncmp(diag, "U", 1)!=0 && strncmp(diag, "N", 1)!=0) { info = 3; } else if (*n < 0) { info = 4; } else if (*lda < max(1,*n)) { info = 6; } else if (*incx == 0) { info = 8; } if (info != 0) { input_error_dist("ZTRSV ", &info); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } noconj = (strncmp(trans, "T", 1)==0); nounit = (strncmp(diag, "N", 1)==0); /* Set up the start point in X if the increment is not unity. This will be ( N - 1 )*INCX too small for descending loops. */ if (*incx <= 0) { kx = 1 - (*n - 1) * *incx; } else if (*incx != 1) { kx = 1; } /* Start the operations. In this version the elements of A are accessed sequentially with one pass through A. */ if (strncmp(trans, "N", 1)==0) { /* Form x := inv( A )*x. */ if (strncmp(uplo, "U", 1)==0) { if (*incx == 1) { for (j = *n; j >= 1; --j) { i__1 = j; if (X(j).r != 0. || X(j).i != 0.) { if (nounit) { i__1 = j; z_div(&z__1, &X(j), &A(j,j)); X(j).r = z__1.r, X(j).i = z__1.i; } i__1 = j; temp.r = X(j).r, temp.i = X(j).i; for (i = j - 1; i >= 1; --i) { i__1 = i; i__2 = i; i__3 = i + j * a_dim1; z__2.r = temp.r * A(i,j).r - temp.i * A(i,j).i, z__2.i = temp.r * A(i,j).i + temp.i * A(i,j).r; z__1.r = X(i).r - z__2.r, z__1.i = X(i).i - z__2.i; X(i).r = z__1.r, X(i).i = z__1.i; /* L10: */ } } /* L20: */ } } else { jx = kx + (*n - 1) * *incx; for (j = *n; j >= 1; --j) { i__1 = jx; if (X(jx).r != 0. || X(jx).i != 0.) { if (nounit) { i__1 = jx; z_div(&z__1, &X(jx), &A(j,j)); X(jx).r = z__1.r, X(jx).i = z__1.i; } i__1 = jx; temp.r = X(jx).r, temp.i = X(jx).i; ix = jx; for (i = j - 1; i >= 1; --i) { ix -= *incx; i__1 = ix; i__2 = ix; i__3 = i + j * a_dim1; z__2.r = temp.r * A(i,j).r - temp.i * A(i,j).i, z__2.i = temp.r * A(i,j).i + temp.i * A(i,j).r; z__1.r = X(ix).r - z__2.r, z__1.i = X(ix).i - z__2.i; X(ix).r = z__1.r, X(ix).i = z__1.i; /* L30: */ } } jx -= *incx; /* L40: */ } } } else { if (*incx == 1) { i__1 = *n; for (j = 1; j <= *n; ++j) { i__2 = j; if (X(j).r != 0. || X(j).i != 0.) { if (nounit) { i__2 = j; z_div(&z__1, &X(j), &A(j,j)); X(j).r = z__1.r, X(j).i = z__1.i; } i__2 = j; temp.r = X(j).r, temp.i = X(j).i; i__2 = *n; for (i = j + 1; i <= *n; ++i) { i__3 = i; i__4 = i; i__5 = i + j * a_dim1; z__2.r = temp.r * A(i,j).r - temp.i * A(i,j).i, z__2.i = temp.r * A(i,j).i + temp.i * A(i,j).r; z__1.r = X(i).r - z__2.r, z__1.i = X(i).i - z__2.i; X(i).r = z__1.r, X(i).i = z__1.i; /* L50: */ } } /* L60: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= *n; ++j) { i__2 = jx; if (X(jx).r != 0. || X(jx).i != 0.) { if (nounit) { i__2 = jx; z_div(&z__1, &X(jx), &A(j,j)); X(jx).r = z__1.r, X(jx).i = z__1.i; } i__2 = jx; temp.r = X(jx).r, temp.i = X(jx).i; ix = jx; i__2 = *n; for (i = j + 1; i <= *n; ++i) { ix += *incx; i__3 = ix; i__4 = ix; i__5 = i + j * a_dim1; z__2.r = temp.r * A(i,j).r - temp.i * A(i,j).i, z__2.i = temp.r * A(i,j).i + temp.i * A(i,j).r; z__1.r = X(ix).r - z__2.r, z__1.i = X(ix).i - z__2.i; X(ix).r = z__1.r, X(ix).i = z__1.i; /* L70: */ } } jx += *incx; /* L80: */ } } } } else { /* Form x := inv( A' )*x or x := inv( conjg( A' ) )*x. */ if (strncmp(uplo, "U", 1)==0) { if (*incx == 1) { i__1 = *n; for (j = 1; j <= *n; ++j) { i__2 = j; temp.r = X(j).r, temp.i = X(j).i; if (noconj) { i__2 = j - 1; for (i = 1; i <= j-1; ++i) { i__3 = i + j * a_dim1; i__4 = i; z__2.r = A(i,j).r * X(i).r - A(i,j).i * X( i).i, z__2.i = A(i,j).r * X(i).i + A(i,j).i * X(i).r; z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; temp.r = z__1.r, temp.i = z__1.i; /* L90: */ } if (nounit) { z_div(&z__1, &temp, &A(j,j)); temp.r = z__1.r, temp.i = z__1.i; } } else { i__2 = j - 1; for (i = 1; i <= j-1; ++i) { d_cnjg(&z__3, &A(i,j)); i__3 = i; z__2.r = z__3.r * X(i).r - z__3.i * X(i).i, z__2.i = z__3.r * X(i).i + z__3.i * X( i).r; z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; temp.r = z__1.r, temp.i = z__1.i; /* L100: */ } if (nounit) { d_cnjg(&z__2, &A(j,j)); z_div(&z__1, &temp, &z__2); temp.r = z__1.r, temp.i = z__1.i; } } i__2 = j; X(j).r = temp.r, X(j).i = temp.i; /* L110: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= *n; ++j) { ix = kx; i__2 = jx; temp.r = X(jx).r, temp.i = X(jx).i; if (noconj) { i__2 = j - 1; for (i = 1; i <= j-1; ++i) { i__3 = i + j * a_dim1; i__4 = ix; z__2.r = A(i,j).r * X(ix).r - A(i,j).i * X( ix).i, z__2.i = A(i,j).r * X(ix).i + A(i,j).i * X(ix).r; z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; temp.r = z__1.r, temp.i = z__1.i; ix += *incx; /* L120: */ } if (nounit) { z_div(&z__1, &temp, &A(j,j)); temp.r = z__1.r, temp.i = z__1.i; } } else { i__2 = j - 1; for (i = 1; i <= j-1; ++i) { d_cnjg(&z__3, &A(i,j)); i__3 = ix; z__2.r = z__3.r * X(ix).r - z__3.i * X(ix).i, z__2.i = z__3.r * X(ix).i + z__3.i * X( ix).r; z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; temp.r = z__1.r, temp.i = z__1.i; ix += *incx; /* L130: */ } if (nounit) { d_cnjg(&z__2, &A(j,j)); z_div(&z__1, &temp, &z__2); temp.r = z__1.r, temp.i = z__1.i; } } i__2 = jx; X(jx).r = temp.r, X(jx).i = temp.i; jx += *incx; /* L140: */ } } } else { if (*incx == 1) { for (j = *n; j >= 1; --j) { i__1 = j; temp.r = X(j).r, temp.i = X(j).i; if (noconj) { i__1 = j + 1; for (i = *n; i >= j+1; --i) { i__2 = i + j * a_dim1; i__3 = i; z__2.r = A(i,j).r * X(i).r - A(i,j).i * X( i).i, z__2.i = A(i,j).r * X(i).i + A(i,j).i * X(i).r; z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; temp.r = z__1.r, temp.i = z__1.i; /* L150: */ } if (nounit) { z_div(&z__1, &temp, &A(j,j)); temp.r = z__1.r, temp.i = z__1.i; } } else { i__1 = j + 1; for (i = *n; i >= j+1; --i) { d_cnjg(&z__3, &A(i,j)); i__2 = i; z__2.r = z__3.r * X(i).r - z__3.i * X(i).i, z__2.i = z__3.r * X(i).i + z__3.i * X( i).r; z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; temp.r = z__1.r, temp.i = z__1.i; /* L160: */ } if (nounit) { d_cnjg(&z__2, &A(j,j)); z_div(&z__1, &temp, &z__2); temp.r = z__1.r, temp.i = z__1.i; } } i__1 = j; X(j).r = temp.r, X(j).i = temp.i; /* L170: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { ix = kx; i__1 = jx; temp.r = X(jx).r, temp.i = X(jx).i; if (noconj) { i__1 = j + 1; for (i = *n; i >= j+1; --i) { i__2 = i + j * a_dim1; i__3 = ix; z__2.r = A(i,j).r * X(ix).r - A(i,j).i * X( ix).i, z__2.i = A(i,j).r * X(ix).i + A(i,j).i * X(ix).r; z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; temp.r = z__1.r, temp.i = z__1.i; ix -= *incx; /* L180: */ } if (nounit) { z_div(&z__1, &temp, &A(j,j)); temp.r = z__1.r, temp.i = z__1.i; } } else { i__1 = j + 1; for (i = *n; i >= j+1; --i) { d_cnjg(&z__3, &A(i,j)); i__2 = ix; z__2.r = z__3.r * X(ix).r - z__3.i * X(ix).i, z__2.i = z__3.r * X(ix).i + z__3.i * X( ix).r; z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; temp.r = z__1.r, temp.i = z__1.i; ix -= *incx; /* L190: */ } if (nounit) { d_cnjg(&z__2, &A(j,j)); z_div(&z__1, &temp, &z__2); temp.r = z__1.r, temp.i = z__1.i; } } i__1 = jx; X(jx).r = temp.r, X(jx).i = temp.i; jx -= *incx; /* L200: */ } } } } return 0; /* End of ZTRSV . */ } /* ztrsv_ */ SuperLU_DIST_5.3.0/CBLAS/drot.c0000644013363400111340000000266613233431301014606 0ustar xiaoyessg /* -- translated by f2c (version 19940927). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ #include "f2c.h" /* Subroutine */ int drot_(integer *n, doublereal *dx, integer *incx, doublereal *dy, integer *incy, doublereal *c, doublereal *s) { /* System generated locals */ integer i__1; /* Local variables */ static integer i; static doublereal dtemp; static integer ix, iy; /* applies a plane rotation. jack dongarra, linpack, 3/11/78. modified 12/3/93, array(1) declarations changed to array(*) Parameter adjustments Function Body */ #define DY(I) dy[(I)-1] #define DX(I) dx[(I)-1] if (*n <= 0) { return 0; } if (*incx == 1 && *incy == 1) { goto L20; } /* code for unequal increments or equal increments not equal to 1 */ ix = 1; iy = 1; if (*incx < 0) { ix = (-(*n) + 1) * *incx + 1; } if (*incy < 0) { iy = (-(*n) + 1) * *incy + 1; } i__1 = *n; for (i = 1; i <= *n; ++i) { dtemp = *c * DX(ix) + *s * DY(iy); DY(iy) = *c * DY(iy) - *s * DX(ix); DX(ix) = dtemp; ix += *incx; iy += *incy; /* L10: */ } return 0; /* code for both increments equal to 1 */ L20: i__1 = *n; for (i = 1; i <= *n; ++i) { dtemp = *c * DX(i) + *s * DY(i); DY(i) = *c * DY(i) - *s * DX(i); DX(i) = dtemp; /* L30: */ } return 0; } /* drot_ */ SuperLU_DIST_5.3.0/CBLAS/isamax.c0000644013363400111340000000265413233431301015115 0ustar xiaoyessg /* -- translated by f2c (version 19940927). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ #include "f2c.h" integer isamax_(integer *n, real *sx, integer *incx) { /* System generated locals */ integer ret_val, i__1; real r__1; /* Local variables */ static real smax; static integer i, ix; /* finds the index of element having max. absolute value. jack dongarra, linpack, 3/11/78. modified 3/93 to return if incx .le. 0. modified 12/3/93, array(1) declarations changed to array(*) Parameter adjustments Function Body */ #define SX(I) sx[(I)-1] ret_val = 0; if (*n < 1 || *incx <= 0) { return ret_val; } ret_val = 1; if (*n == 1) { return ret_val; } if (*incx == 1) { goto L20; } /* code for increment not equal to 1 */ ix = 1; smax = dabs(SX(1)); ix += *incx; i__1 = *n; for (i = 2; i <= *n; ++i) { if ((r__1 = SX(ix), dabs(r__1)) <= smax) { goto L5; } ret_val = i; smax = (r__1 = SX(ix), dabs(r__1)); L5: ix += *incx; /* L10: */ } return ret_val; /* code for increment equal to 1 */ L20: smax = dabs(SX(1)); i__1 = *n; for (i = 2; i <= *n; ++i) { if ((r__1 = SX(i), dabs(r__1)) <= smax) { goto L30; } ret_val = i; smax = (r__1 = SX(i), dabs(r__1)); L30: ; } return ret_val; } /* isamax_ */ SuperLU_DIST_5.3.0/CBLAS/superlu_f2c.h0000644013363400111340000000175513233431301016072 0ustar xiaoyessg/* f2c.h -- Standard Fortran to C header file */ /** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ #include "Cnames.h" #ifndef F2C_INCLUDE #define F2C_INCLUDE typedef int integer; typedef int logical; typedef char *address; typedef short int shortint; typedef float real; typedef double doublereal; typedef struct { real r, i; } complex; typedef struct { doublereal r, i; } doublecomplex; typedef short int shortlogical; typedef char logical1; typedef char integer1; /* typedef long long longint; */ /* system-dependent */ #define TRUE_ (1) #define FALSE_ (0) /* Extern is for use with -E */ #ifndef Extern #define Extern extern #endif #define abs(x) ((x) >= 0 ? (x) : -(x)) #define dabs(x) (doublereal)abs(x) #define min(a,b) ((a) <= (b) ? (a) : (b)) #define max(a,b) ((a) >= (b) ? (a) : (b)) #define dmin(a,b) (doublereal)min(a,b) #define dmax(a,b) (doublereal)max(a,b) #define VOID void #endif SuperLU_DIST_5.3.0/CBLAS/izamax.c0000644013363400111340000000270213233431301015116 0ustar xiaoyessg /* -- translated by f2c (version 19940927). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ #include "f2c.h" integer izamax_(integer *n, doublecomplex *zx, integer *incx) { /* System generated locals */ integer ret_val, i__1; /* Local variables */ static doublereal smax; static integer i; extern doublereal dcabs1_(doublecomplex *); static integer ix; /* finds the index of element having max. absolute value. jack dongarra, 1/15/85. modified 3/93 to return if incx .le. 0. modified 12/3/93, array(1) declarations changed to array(*) Parameter adjustments Function Body */ #define ZX(I) zx[(I)-1] ret_val = 0; if (*n < 1 || *incx <= 0) { return ret_val; } ret_val = 1; if (*n == 1) { return ret_val; } if (*incx == 1) { goto L20; } /* code for increment not equal to 1 */ ix = 1; smax = dcabs1_(&ZX(1)); ix += *incx; i__1 = *n; for (i = 2; i <= *n; ++i) { if (dcabs1_(&ZX(ix)) <= smax) { goto L5; } ret_val = i; smax = dcabs1_(&ZX(ix)); L5: ix += *incx; /* L10: */ } return ret_val; /* code for increment equal to 1 */ L20: smax = dcabs1_(&ZX(1)); i__1 = *n; for (i = 2; i <= *n; ++i) { if (dcabs1_(&ZX(i)) <= smax) { goto L30; } ret_val = i; smax = dcabs1_(&ZX(i)); L30: ; } return ret_val; } /* izamax_ */ SuperLU_DIST_5.3.0/CBLAS/dzasum.c0000644013363400111340000000237713233431301015140 0ustar xiaoyessg /* -- translated by f2c (version 19940927). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ #include "f2c.h" doublereal dzasum_(integer *n, doublecomplex *zx, integer *incx) { /* System generated locals */ integer i__1; doublereal ret_val; /* Local variables */ static integer i; static doublereal stemp; extern doublereal dcabs1_(doublecomplex *); static integer ix; /* takes the sum of the absolute values. jack dongarra, 3/11/78. modified 3/93 to return if incx .le. 0. modified 12/3/93, array(1) declarations changed to array(*) Parameter adjustments Function Body */ #define ZX(I) zx[(I)-1] ret_val = 0.; stemp = 0.; if (*n <= 0 || *incx <= 0) { return ret_val; } if (*incx == 1) { goto L20; } /* code for increment not equal to 1 */ ix = 1; i__1 = *n; for (i = 1; i <= *n; ++i) { stemp += dcabs1_(&ZX(ix)); ix += *incx; /* L10: */ } ret_val = stemp; return ret_val; /* code for increment equal to 1 */ L20: i__1 = *n; for (i = 1; i <= *n; ++i) { stemp += dcabs1_(&ZX(i)); /* L30: */ } ret_val = stemp; return ret_val; } /* dzasum_ */ SuperLU_DIST_5.3.0/CBLAS/sdot.c0000644013363400111340000000342213233431301014576 0ustar xiaoyessg /* -- translated by f2c (version 19940927). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ #include "f2c.h" real sdot_(integer *n, real *sx, integer *incx, real *sy, integer *incy) { /* System generated locals */ integer i__1; real ret_val; /* Local variables */ static integer i, m; static real stemp; static integer ix, iy, mp1; /* forms the dot product of two vectors. uses unrolled loops for increments equal to one. jack dongarra, linpack, 3/11/78. modified 12/3/93, array(1) declarations changed to array(*) Parameter adjustments Function Body */ #define SY(I) sy[(I)-1] #define SX(I) sx[(I)-1] stemp = 0.f; ret_val = 0.f; if (*n <= 0) { return ret_val; } if (*incx == 1 && *incy == 1) { goto L20; } /* code for unequal increments or equal increments not equal to 1 */ ix = 1; iy = 1; if (*incx < 0) { ix = (-(*n) + 1) * *incx + 1; } if (*incy < 0) { iy = (-(*n) + 1) * *incy + 1; } i__1 = *n; for (i = 1; i <= *n; ++i) { stemp += SX(ix) * SY(iy); ix += *incx; iy += *incy; /* L10: */ } ret_val = stemp; return ret_val; /* code for both increments equal to 1 clean-up loop */ L20: m = *n % 5; if (m == 0) { goto L40; } i__1 = m; for (i = 1; i <= m; ++i) { stemp += SX(i) * SY(i); /* L30: */ } if (*n < 5) { goto L60; } L40: mp1 = m + 1; i__1 = *n; for (i = mp1; i <= *n; i += 5) { stemp = stemp + SX(i) * SY(i) + SX(i + 1) * SY(i + 1) + SX(i + 2) * SY(i + 2) + SX(i + 3) * SY(i + 3) + SX(i + 4) * SY(i + 4); /* L50: */ } L60: ret_val = stemp; return ret_val; } /* sdot_ */ SuperLU_DIST_5.3.0/CBLAS/sger.c0000644013363400111340000001035613233431301014571 0ustar xiaoyessg /* -- translated by f2c (version 19940927). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ #include "f2c.h" /* Subroutine */ int sger_(integer *m, integer *n, real *alpha, real *x, integer *incx, real *y, integer *incy, real *a, integer *lda) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Local variables */ static integer info; static real temp; static integer i, j, ix, jy, kx; extern /* Subroutine */ int input_error_dist(char *, integer *); /* Purpose ======= SGER performs the rank 1 operation A := alpha*x*y' + A, where alpha is a scalar, x is an m element vector, y is an n element vector and A is an m by n matrix. Parameters ========== M - INTEGER. On entry, M specifies the number of rows of the matrix A. M must be at least zero. Unchanged on exit. N - INTEGER. On entry, N specifies the number of columns of the matrix A. N must be at least zero. Unchanged on exit. ALPHA - REAL . On entry, ALPHA specifies the scalar alpha. Unchanged on exit. X - REAL array of dimension at least ( 1 + ( m - 1 )*abs( INCX ) ). Before entry, the incremented array X must contain the m element vector x. Unchanged on exit. INCX - INTEGER. On entry, INCX specifies the increment for the elements of X. INCX must not be zero. Unchanged on exit. Y - REAL array of dimension at least ( 1 + ( n - 1 )*abs( INCY ) ). Before entry, the incremented array Y must contain the n element vector y. Unchanged on exit. INCY - INTEGER. On entry, INCY specifies the increment for the elements of Y. INCY must not be zero. Unchanged on exit. A - REAL array of DIMENSION ( LDA, n ). Before entry, the leading m by n part of the array A must contain the matrix of coefficients. On exit, A is overwritten by the updated matrix. LDA - INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. LDA must be at least max( 1, m ). Unchanged on exit. Level 2 Blas routine. -- Written on 22-October-1986. Jack Dongarra, Argonne National Lab. Jeremy Du Croz, Nag Central Office. Sven Hammarling, Nag Central Office. Richard Hanson, Sandia National Labs. Test the input parameters. Parameter adjustments Function Body */ #define X(I) x[(I)-1] #define Y(I) y[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] info = 0; if (*m < 0) { info = 1; } else if (*n < 0) { info = 2; } else if (*incx == 0) { info = 5; } else if (*incy == 0) { info = 7; } else if (*lda < max(1,*m)) { info = 9; } if (info != 0) { input_error_dist("SGER ", &info); return 0; } /* Quick return if possible. */ if (*m == 0 || *n == 0 || *alpha == 0.f) { return 0; } /* Start the operations. In this version the elements of A are accessed sequentially with one pass through A. */ if (*incy > 0) { jy = 1; } else { jy = 1 - (*n - 1) * *incy; } if (*incx == 1) { i__1 = *n; for (j = 1; j <= *n; ++j) { if (Y(jy) != 0.f) { temp = *alpha * Y(jy); i__2 = *m; for (i = 1; i <= *m; ++i) { A(i,j) += X(i) * temp; /* L10: */ } } jy += *incy; /* L20: */ } } else { if (*incx > 0) { kx = 1; } else { kx = 1 - (*m - 1) * *incx; } i__1 = *n; for (j = 1; j <= *n; ++j) { if (Y(jy) != 0.f) { temp = *alpha * Y(jy); ix = kx; i__2 = *m; for (i = 1; i <= *m; ++i) { A(i,j) += X(ix) * temp; ix += *incx; /* L30: */ } } jy += *incy; /* L40: */ } } return 0; /* End of SGER . */ } /* sger_ */ SuperLU_DIST_5.3.0/CBLAS/srot.c0000644013363400111340000000263013233431301014614 0ustar xiaoyessg /* -- translated by f2c (version 19940927). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ #include "f2c.h" /* Subroutine */ int srot_(integer *n, real *sx, integer *incx, real *sy, integer *incy, real *c, real *s) { /* System generated locals */ integer i__1; /* Local variables */ static integer i; static real stemp; static integer ix, iy; /* applies a plane rotation. jack dongarra, linpack, 3/11/78. modified 12/3/93, array(1) declarations changed to array(*) Parameter adjustments Function Body */ #define SY(I) sy[(I)-1] #define SX(I) sx[(I)-1] if (*n <= 0) { return 0; } if (*incx == 1 && *incy == 1) { goto L20; } /* code for unequal increments or equal increments not equal to 1 */ ix = 1; iy = 1; if (*incx < 0) { ix = (-(*n) + 1) * *incx + 1; } if (*incy < 0) { iy = (-(*n) + 1) * *incy + 1; } i__1 = *n; for (i = 1; i <= *n; ++i) { stemp = *c * SX(ix) + *s * SY(iy); SY(iy) = *c * SY(iy) - *s * SX(ix); SX(ix) = stemp; ix += *incx; iy += *incy; /* L10: */ } return 0; /* code for both increments equal to 1 */ L20: i__1 = *n; for (i = 1; i <= *n; ++i) { stemp = *c * SX(i) + *s * SY(i); SY(i) = *c * SY(i) - *s * SX(i); SX(i) = stemp; /* L30: */ } return 0; } /* srot_ */ SuperLU_DIST_5.3.0/CBLAS/dznrm2.c0000644013363400111340000000377013233431301015047 0ustar xiaoyessg /* -- translated by f2c (version 19940927). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ #include "f2c.h" doublereal dznrm2_(integer *n, doublecomplex *x, integer *incx) { /* System generated locals */ integer i__1, i__2, i__3; doublereal ret_val, d__1; /* Builtin functions */ double d_imag(doublecomplex *), sqrt(doublereal); /* Local variables */ static doublereal temp, norm, scale; static integer ix; static doublereal ssq; /* DZNRM2 returns the euclidean norm of a vector via the function name, so that DZNRM2 := sqrt( conjg( x' )*x ) -- This version written on 25-October-1982. Modified on 14-October-1993 to inline the call to ZLASSQ. Sven Hammarling, Nag Ltd. Parameter adjustments Function Body */ #define X(I) x[(I)-1] if (*n < 1 || *incx < 1) { norm = 0.; } else { scale = 0.; ssq = 1.; /* The following loop is equivalent to this call to the LAPACK auxiliary routine: CALL ZLASSQ( N, X, INCX, SCALE, SSQ ) */ i__1 = (*n - 1) * *incx + 1; i__2 = *incx; for (ix = 1; *incx < 0 ? ix >= (*n-1)**incx+1 : ix <= (*n-1)**incx+1; ix += *incx) { i__3 = ix; if (X(ix).r != 0.) { i__3 = ix; temp = (d__1 = X(ix).r, abs(d__1)); if (scale < temp) { /* Computing 2nd power */ d__1 = scale / temp; ssq = ssq * (d__1 * d__1) + 1.; scale = temp; } else { /* Computing 2nd power */ d__1 = temp / scale; ssq += d__1 * d__1; } } if (d_imag(&X(ix)) != 0.) { temp = (d__1 = d_imag(&X(ix)), abs(d__1)); if (scale < temp) { /* Computing 2nd power */ d__1 = scale / temp; ssq = ssq * (d__1 * d__1) + 1.; scale = temp; } else { /* Computing 2nd power */ d__1 = temp / scale; ssq += d__1 * d__1; } } /* L10: */ } norm = scale * sqrt(ssq); } ret_val = norm; return ret_val; /* End of DZNRM2. */ } /* dznrm2_ */ SuperLU_DIST_5.3.0/CBLAS/caxpy.c0000644013363400111340000000357513233431301014762 0ustar xiaoyessg /* -- translated by f2c (version 19940927). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ #include #include "f2c.h" /* Subroutine */ int caxpy_(integer *n, complex *ca, complex *cx, integer * incx, complex *cy, integer *incy) { /* System generated locals */ integer i__1, i__2, i__3, i__4; real r__1, r__2; complex q__1, q__2; /* Builtin functions */ double r_imag(complex *); /* Local variables */ static integer i, ix, iy; /* constant times a vector plus a vector. jack dongarra, linpack, 3/11/78. modified 12/3/93, array(1) declarations changed to array(*) Parameter adjustments Function Body */ #define CY(I) cy[(I)-1] #define CX(I) cx[(I)-1] if (*n <= 0) { return 0; } if ((r__1 = ca->r, dabs(r__1)) + (r__2 = r_imag(ca), dabs(r__2)) == 0.f) { return 0; } if (*incx == 1 && *incy == 1) { goto L20; } /* code for unequal increments or equal increments not equal to 1 */ ix = 1; iy = 1; if (*incx < 0) { ix = (-(*n) + 1) * *incx + 1; } if (*incy < 0) { iy = (-(*n) + 1) * *incy + 1; } i__1 = *n; for (i = 1; i <= *n; ++i) { i__2 = iy; i__3 = iy; i__4 = ix; q__2.r = ca->r * CX(ix).r - ca->i * CX(ix).i, q__2.i = ca->r * CX( ix).i + ca->i * CX(ix).r; q__1.r = CY(iy).r + q__2.r, q__1.i = CY(iy).i + q__2.i; CY(iy).r = q__1.r, CY(iy).i = q__1.i; ix += *incx; iy += *incy; /* L10: */ } return 0; /* code for both increments equal to 1 */ L20: i__1 = *n; for (i = 1; i <= *n; ++i) { i__2 = i; i__3 = i; i__4 = i; q__2.r = ca->r * CX(i).r - ca->i * CX(i).i, q__2.i = ca->r * CX( i).i + ca->i * CX(i).r; q__1.r = CY(i).r + q__2.r, q__1.i = CY(i).i + q__2.i; CY(i).r = q__1.r, CY(i).i = q__1.i; /* L30: */ } return 0; } /* caxpy_ */ SuperLU_DIST_5.3.0/CBLAS/ccopy.c0000644013363400111340000000251713233431301014746 0ustar xiaoyessg /* -- translated by f2c (version 19940927). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ #include "f2c.h" /* Subroutine */ int ccopy_(integer *n, complex *cx, integer *incx, complex * cy, integer *incy) { /* System generated locals */ integer i__1, i__2, i__3; /* Local variables */ static integer i, ix, iy; /* copies a vector, x, to a vector, y. jack dongarra, linpack, 3/11/78. modified 12/3/93, array(1) declarations changed to array(*) Parameter adjustments Function Body */ #define CY(I) cy[(I)-1] #define CX(I) cx[(I)-1] if (*n <= 0) { return 0; } if (*incx == 1 && *incy == 1) { goto L20; } /* code for unequal increments or equal increments not equal to 1 */ ix = 1; iy = 1; if (*incx < 0) { ix = (-(*n) + 1) * *incx + 1; } if (*incy < 0) { iy = (-(*n) + 1) * *incy + 1; } i__1 = *n; for (i = 1; i <= *n; ++i) { i__2 = iy; i__3 = ix; CY(iy).r = CX(ix).r, CY(iy).i = CX(ix).i; ix += *incx; iy += *incy; /* L10: */ } return 0; /* code for both increments equal to 1 */ L20: i__1 = *n; for (i = 1; i <= *n; ++i) { i__2 = i; i__3 = i; CY(i).r = CX(i).r, CY(i).i = CX(i).i; /* L30: */ } return 0; } /* ccopy_ */ SuperLU_DIST_5.3.0/CBLAS/cdotc.c0000644013363400111340000000374513233431301014731 0ustar xiaoyessg/* -- translated by f2c (version 19940927). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ #include "f2c.h" /* Complex */ VOID cdotc_(complex * ret_val, integer *n, complex *cx, integer *incx, complex *cy, integer *incy) { /* System generated locals */ integer i__1, i__2; complex q__1, q__2, q__3; /* Builtin functions */ void r_cnjg(complex *, complex *); /* Local variables */ static integer i; static complex ctemp; static integer ix, iy; /* forms the dot product of two vectors, conjugating the first vector. jack dongarra, linpack, 3/11/78. modified 12/3/93, array(1) declarations changed to array(*) Parameter adjustments */ --cy; --cx; /* Function Body */ ctemp.r = 0.f, ctemp.i = 0.f; ret_val->r = 0.f, ret_val->i = 0.f; if (*n <= 0) { return ; } if (*incx == 1 && *incy == 1) { goto L20; } /* code for unequal increments or equal increments not equal to 1 */ ix = 1; iy = 1; if (*incx < 0) { ix = (-(*n) + 1) * *incx + 1; } if (*incy < 0) { iy = (-(*n) + 1) * *incy + 1; } i__1 = *n; for (i = 1; i <= *n; ++i) { r_cnjg(&q__3, &cx[ix]); i__2 = iy; q__2.r = q__3.r * cy[iy].r - q__3.i * cy[iy].i, q__2.i = q__3.r * cy[iy].i + q__3.i * cy[iy].r; q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i; ctemp.r = q__1.r, ctemp.i = q__1.i; ix += *incx; iy += *incy; /* L10: */ } ret_val->r = ctemp.r, ret_val->i = ctemp.i; return ; /* code for both increments equal to 1 */ L20: i__1 = *n; for (i = 1; i <= *n; ++i) { r_cnjg(&q__3, &cx[i]); i__2 = i; q__2.r = q__3.r * cy[i].r - q__3.i * cy[i].i, q__2.i = q__3.r * cy[i].i + q__3.i * cy[i].r; q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i; ctemp.r = q__1.r, ctemp.i = q__1.i; /* L30: */ } ret_val->r = ctemp.r, ret_val->i = ctemp.i; return ; } /* cdotc_ */ SuperLU_DIST_5.3.0/CBLAS/cgemv.c0000644013363400111340000002362113233431301014731 0ustar xiaoyessg /* -- translated by f2c (version 19940927). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ #include #include "f2c.h" /* Subroutine */ int cgemv_(char *trans, integer *m, integer *n, complex * alpha, complex *a, integer *lda, complex *x, integer *incx, complex * beta, complex *y, integer *incy) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; complex q__1, q__2, q__3; /* Builtin functions */ void r_cnjg(complex *, complex *); /* Local variables */ static integer info; static complex temp; static integer lenx, leny, i, j; static integer ix, iy, jx, jy, kx, ky; extern /* Subroutine */ int input_error_dist(char *, integer *); static logical noconj; /* Purpose ======= CGEMV performs one of the matrix-vector operations y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, or y := alpha*conjg( A' )*x + beta*y, where alpha and beta are scalars, x and y are vectors and A is an m by n matrix. Parameters ========== TRANS - CHARACTER*1. On entry, TRANS specifies the operation to be performed as follows: TRANS = 'N' or 'n' y := alpha*A*x + beta*y. TRANS = 'T' or 't' y := alpha*A'*x + beta*y. TRANS = 'C' or 'c' y := alpha*conjg( A' )*x + beta*y. Unchanged on exit. M - INTEGER. On entry, M specifies the number of rows of the matrix A. M must be at least zero. Unchanged on exit. N - INTEGER. On entry, N specifies the number of columns of the matrix A. N must be at least zero. Unchanged on exit. ALPHA - COMPLEX . On entry, ALPHA specifies the scalar alpha. Unchanged on exit. A - COMPLEX array of DIMENSION ( LDA, n ). Before entry, the leading m by n part of the array A must contain the matrix of coefficients. Unchanged on exit. LDA - INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. LDA must be at least max( 1, m ). Unchanged on exit. X - COMPLEX array of DIMENSION at least ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' and at least ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. Before entry, the incremented array X must contain the vector x. Unchanged on exit. INCX - INTEGER. On entry, INCX specifies the increment for the elements of X. INCX must not be zero. Unchanged on exit. BETA - COMPLEX . On entry, BETA specifies the scalar beta. When BETA is supplied as zero then Y need not be set on input. Unchanged on exit. Y - COMPLEX array of DIMENSION at least ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' and at least ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. Before entry with BETA non-zero, the incremented array Y must contain the vector y. On exit, Y is overwritten by the updated vector y. INCY - INTEGER. On entry, INCY specifies the increment for the elements of Y. INCY must not be zero. Unchanged on exit. Level 2 Blas routine. -- Written on 22-October-1986. Jack Dongarra, Argonne National Lab. Jeremy Du Croz, Nag Central Office. Sven Hammarling, Nag Central Office. Richard Hanson, Sandia National Labs. Test the input parameters. Parameter adjustments Function Body */ #define X(I) x[(I)-1] #define Y(I) y[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] info = 0; if ( strncmp(trans, "N", 1)!= 0 && strncmp(trans, "T", 1) != 0 && ! strncmp(trans, "C", 1) !=0 ) { info = 1; } else if (*m < 0) { info = 2; } else if (*n < 0) { info = 3; } else if (*lda < max(1,*m)) { info = 6; } else if (*incx == 0) { info = 8; } else if (*incy == 0) { info = 11; } if (info != 0) { input_error_dist("CGEMV ", &info); return 0; } /* Quick return if possible. */ if (*m == 0 || *n == 0 || alpha->r == 0.f && alpha->i == 0.f && (beta->r == 1.f && beta->i == 0.f)) { return 0; } noconj = (strncmp(trans, "T", 1)==0); /* Set LENX and LENY, the lengths of the vectors x and y, and set up the start points in X and Y. */ if (strncmp(trans, "N", 1)==0) { lenx = *n; leny = *m; } else { lenx = *m; leny = *n; } if (*incx > 0) { kx = 1; } else { kx = 1 - (lenx - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (leny - 1) * *incy; } /* Start the operations. In this version the elements of A are accessed sequentially with one pass through A. First form y := beta*y. */ if (beta->r != 1.f || beta->i != 0.f) { if (*incy == 1) { if (beta->r == 0.f && beta->i == 0.f) { i__1 = leny; for (i = 1; i <= leny; ++i) { i__2 = i; Y(i).r = 0.f, Y(i).i = 0.f; /* L10: */ } } else { i__1 = leny; for (i = 1; i <= leny; ++i) { i__2 = i; i__3 = i; q__1.r = beta->r * Y(i).r - beta->i * Y(i).i, q__1.i = beta->r * Y(i).i + beta->i * Y(i) .r; Y(i).r = q__1.r, Y(i).i = q__1.i; /* L20: */ } } } else { iy = ky; if (beta->r == 0.f && beta->i == 0.f) { i__1 = leny; for (i = 1; i <= leny; ++i) { i__2 = iy; Y(iy).r = 0.f, Y(iy).i = 0.f; iy += *incy; /* L30: */ } } else { i__1 = leny; for (i = 1; i <= leny; ++i) { i__2 = iy; i__3 = iy; q__1.r = beta->r * Y(iy).r - beta->i * Y(iy).i, q__1.i = beta->r * Y(iy).i + beta->i * Y(iy) .r; Y(iy).r = q__1.r, Y(iy).i = q__1.i; iy += *incy; /* L40: */ } } } } if (alpha->r == 0.f && alpha->i == 0.f) { return 0; } if (strncmp(trans, "N", 1)==0) { /* Form y := alpha*A*x + y. */ jx = kx; if (*incy == 1) { i__1 = *n; for (j = 1; j <= *n; ++j) { i__2 = jx; if (X(jx).r != 0.f || X(jx).i != 0.f) { i__2 = jx; q__1.r = alpha->r * X(jx).r - alpha->i * X(jx).i, q__1.i = alpha->r * X(jx).i + alpha->i * X(jx) .r; temp.r = q__1.r, temp.i = q__1.i; i__2 = *m; for (i = 1; i <= *m; ++i) { i__3 = i; i__4 = i; i__5 = i + j * a_dim1; q__2.r = temp.r * A(i,j).r - temp.i * A(i,j).i, q__2.i = temp.r * A(i,j).i + temp.i * A(i,j) .r; q__1.r = Y(i).r + q__2.r, q__1.i = Y(i).i + q__2.i; Y(i).r = q__1.r, Y(i).i = q__1.i; /* L50: */ } } jx += *incx; /* L60: */ } } else { i__1 = *n; for (j = 1; j <= *n; ++j) { i__2 = jx; if (X(jx).r != 0.f || X(jx).i != 0.f) { i__2 = jx; q__1.r = alpha->r * X(jx).r - alpha->i * X(jx).i, q__1.i = alpha->r * X(jx).i + alpha->i * X(jx) .r; temp.r = q__1.r, temp.i = q__1.i; iy = ky; i__2 = *m; for (i = 1; i <= *m; ++i) { i__3 = iy; i__4 = iy; i__5 = i + j * a_dim1; q__2.r = temp.r * A(i,j).r - temp.i * A(i,j).i, q__2.i = temp.r * A(i,j).i + temp.i * A(i,j) .r; q__1.r = Y(iy).r + q__2.r, q__1.i = Y(iy).i + q__2.i; Y(iy).r = q__1.r, Y(iy).i = q__1.i; iy += *incy; /* L70: */ } } jx += *incx; /* L80: */ } } } else { /* Form y := alpha*A'*x + y or y := alpha*conjg( A' )*x + y. */ jy = ky; if (*incx == 1) { i__1 = *n; for (j = 1; j <= *n; ++j) { temp.r = 0.f, temp.i = 0.f; if (noconj) { i__2 = *m; for (i = 1; i <= *m; ++i) { i__3 = i + j * a_dim1; i__4 = i; q__2.r = A(i,j).r * X(i).r - A(i,j).i * X(i) .i, q__2.i = A(i,j).r * X(i).i + A(i,j) .i * X(i).r; q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; temp.r = q__1.r, temp.i = q__1.i; /* L90: */ } } else { i__2 = *m; for (i = 1; i <= *m; ++i) { r_cnjg(&q__3, &A(i,j)); i__3 = i; q__2.r = q__3.r * X(i).r - q__3.i * X(i).i, q__2.i = q__3.r * X(i).i + q__3.i * X(i) .r; q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; temp.r = q__1.r, temp.i = q__1.i; /* L100: */ } } i__2 = jy; i__3 = jy; q__2.r = alpha->r * temp.r - alpha->i * temp.i, q__2.i = alpha->r * temp.i + alpha->i * temp.r; q__1.r = Y(jy).r + q__2.r, q__1.i = Y(jy).i + q__2.i; Y(jy).r = q__1.r, Y(jy).i = q__1.i; jy += *incy; /* L110: */ } } else { i__1 = *n; for (j = 1; j <= *n; ++j) { temp.r = 0.f, temp.i = 0.f; ix = kx; if (noconj) { i__2 = *m; for (i = 1; i <= *m; ++i) { i__3 = i + j * a_dim1; i__4 = ix; q__2.r = A(i,j).r * X(ix).r - A(i,j).i * X(ix) .i, q__2.i = A(i,j).r * X(ix).i + A(i,j) .i * X(ix).r; q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; temp.r = q__1.r, temp.i = q__1.i; ix += *incx; /* L120: */ } } else { i__2 = *m; for (i = 1; i <= *m; ++i) { r_cnjg(&q__3, &A(i,j)); i__3 = ix; q__2.r = q__3.r * X(ix).r - q__3.i * X(ix).i, q__2.i = q__3.r * X(ix).i + q__3.i * X(ix) .r; q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; temp.r = q__1.r, temp.i = q__1.i; ix += *incx; /* L130: */ } } i__2 = jy; i__3 = jy; q__2.r = alpha->r * temp.r - alpha->i * temp.i, q__2.i = alpha->r * temp.i + alpha->i * temp.r; q__1.r = Y(jy).r + q__2.r, q__1.i = Y(jy).i + q__2.i; Y(jy).r = q__1.r, Y(jy).i = q__1.i; jy += *incy; /* L140: */ } } } return 0; /* End of CGEMV . */ } /* cgemv_ */ SuperLU_DIST_5.3.0/CBLAS/cgerc.c0000644013363400111340000001234513233431301014714 0ustar xiaoyessg /* -- translated by f2c (version 19940927). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ #include "f2c.h" /* Subroutine */ int cgerc_(integer *m, integer *n, complex *alpha, complex * x, integer *incx, complex *y, integer *incy, complex *a, integer *lda) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; complex q__1, q__2; /* Builtin functions */ void r_cnjg(complex *, complex *); /* Local variables */ static integer info; static complex temp; static integer i, j, ix, jy, kx; extern /* Subroutine */ int input_error_dist(char *, integer *); /* Purpose ======= CGERC performs the rank 1 operation A := alpha*x*conjg( y' ) + A, where alpha is a scalar, x is an m element vector, y is an n element vector and A is an m by n matrix. Parameters ========== M - INTEGER. On entry, M specifies the number of rows of the matrix A. M must be at least zero. Unchanged on exit. N - INTEGER. On entry, N specifies the number of columns of the matrix A. N must be at least zero. Unchanged on exit. ALPHA - COMPLEX . On entry, ALPHA specifies the scalar alpha. Unchanged on exit. X - COMPLEX array of dimension at least ( 1 + ( m - 1 )*abs( INCX ) ). Before entry, the incremented array X must contain the m element vector x. Unchanged on exit. INCX - INTEGER. On entry, INCX specifies the increment for the elements of X. INCX must not be zero. Unchanged on exit. Y - COMPLEX array of dimension at least ( 1 + ( n - 1 )*abs( INCY ) ). Before entry, the incremented array Y must contain the n element vector y. Unchanged on exit. INCY - INTEGER. On entry, INCY specifies the increment for the elements of Y. INCY must not be zero. Unchanged on exit. A - COMPLEX array of DIMENSION ( LDA, n ). Before entry, the leading m by n part of the array A must contain the matrix of coefficients. On exit, A is overwritten by the updated matrix. LDA - INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. LDA must be at least max( 1, m ). Unchanged on exit. Level 2 Blas routine. -- Written on 22-October-1986. Jack Dongarra, Argonne National Lab. Jeremy Du Croz, Nag Central Office. Sven Hammarling, Nag Central Office. Richard Hanson, Sandia National Labs. Test the input parameters. Parameter adjustments Function Body */ #define X(I) x[(I)-1] #define Y(I) y[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] info = 0; if (*m < 0) { info = 1; } else if (*n < 0) { info = 2; } else if (*incx == 0) { info = 5; } else if (*incy == 0) { info = 7; } else if (*lda < max(1,*m)) { info = 9; } if (info != 0) { input_error_dist("CGERC ", &info); return 0; } /* Quick return if possible. */ if (*m == 0 || *n == 0 || alpha->r == 0.f && alpha->i == 0.f) { return 0; } /* Start the operations. In this version the elements of A are accessed sequentially with one pass through A. */ if (*incy > 0) { jy = 1; } else { jy = 1 - (*n - 1) * *incy; } if (*incx == 1) { i__1 = *n; for (j = 1; j <= *n; ++j) { i__2 = jy; if (Y(jy).r != 0.f || Y(jy).i != 0.f) { r_cnjg(&q__2, &Y(jy)); q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = alpha->r * q__2.i + alpha->i * q__2.r; temp.r = q__1.r, temp.i = q__1.i; i__2 = *m; for (i = 1; i <= *m; ++i) { i__3 = i + j * a_dim1; i__4 = i + j * a_dim1; i__5 = i; q__2.r = X(i).r * temp.r - X(i).i * temp.i, q__2.i = X(i).r * temp.i + X(i).i * temp.r; q__1.r = A(i,j).r + q__2.r, q__1.i = A(i,j).i + q__2.i; A(i,j).r = q__1.r, A(i,j).i = q__1.i; /* L10: */ } } jy += *incy; /* L20: */ } } else { if (*incx > 0) { kx = 1; } else { kx = 1 - (*m - 1) * *incx; } i__1 = *n; for (j = 1; j <= *n; ++j) { i__2 = jy; if (Y(jy).r != 0.f || Y(jy).i != 0.f) { r_cnjg(&q__2, &Y(jy)); q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = alpha->r * q__2.i + alpha->i * q__2.r; temp.r = q__1.r, temp.i = q__1.i; ix = kx; i__2 = *m; for (i = 1; i <= *m; ++i) { i__3 = i + j * a_dim1; i__4 = i + j * a_dim1; i__5 = ix; q__2.r = X(ix).r * temp.r - X(ix).i * temp.i, q__2.i = X(ix).r * temp.i + X(ix).i * temp.r; q__1.r = A(i,j).r + q__2.r, q__1.i = A(i,j).i + q__2.i; A(i,j).r = q__1.r, A(i,j).i = q__1.i; ix += *incx; /* L30: */ } } jy += *incy; /* L40: */ } } return 0; /* End of CGERC . */ } /* cgerc_ */ SuperLU_DIST_5.3.0/CBLAS/cgeru.c0000644013363400111340000001221013233431301014725 0ustar xiaoyessg /* -- translated by f2c (version 19940927). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ #include "f2c.h" /* Subroutine */ int cgeru_(integer *m, integer *n, complex *alpha, complex * x, integer *incx, complex *y, integer *incy, complex *a, integer *lda) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; complex q__1, q__2; /* Local variables */ static integer info; static complex temp; static integer i, j, ix, jy, kx; extern /* Subroutine */ int input_error_dist(char *, integer *); /* Purpose ======= CGERU performs the rank 1 operation A := alpha*x*y' + A, where alpha is a scalar, x is an m element vector, y is an n element vector and A is an m by n matrix. Parameters ========== M - INTEGER. On entry, M specifies the number of rows of the matrix A. M must be at least zero. Unchanged on exit. N - INTEGER. On entry, N specifies the number of columns of the matrix A. N must be at least zero. Unchanged on exit. ALPHA - COMPLEX . On entry, ALPHA specifies the scalar alpha. Unchanged on exit. X - COMPLEX array of dimension at least ( 1 + ( m - 1 )*abs( INCX ) ). Before entry, the incremented array X must contain the m element vector x. Unchanged on exit. INCX - INTEGER. On entry, INCX specifies the increment for the elements of X. INCX must not be zero. Unchanged on exit. Y - COMPLEX array of dimension at least ( 1 + ( n - 1 )*abs( INCY ) ). Before entry, the incremented array Y must contain the n element vector y. Unchanged on exit. INCY - INTEGER. On entry, INCY specifies the increment for the elements of Y. INCY must not be zero. Unchanged on exit. A - COMPLEX array of DIMENSION ( LDA, n ). Before entry, the leading m by n part of the array A must contain the matrix of coefficients. On exit, A is overwritten by the updated matrix. LDA - INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. LDA must be at least max( 1, m ). Unchanged on exit. Level 2 Blas routine. -- Written on 22-October-1986. Jack Dongarra, Argonne National Lab. Jeremy Du Croz, Nag Central Office. Sven Hammarling, Nag Central Office. Richard Hanson, Sandia National Labs. Test the input parameters. Parameter adjustments Function Body */ #define X(I) x[(I)-1] #define Y(I) y[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] info = 0; if (*m < 0) { info = 1; } else if (*n < 0) { info = 2; } else if (*incx == 0) { info = 5; } else if (*incy == 0) { info = 7; } else if (*lda < max(1,*m)) { info = 9; } if (info != 0) { input_error_dist("CGERU ", &info); return 0; } /* Quick return if possible. */ if (*m == 0 || *n == 0 || alpha->r == 0.f && alpha->i == 0.f) { return 0; } /* Start the operations. In this version the elements of A are accessed sequentially with one pass through A. */ if (*incy > 0) { jy = 1; } else { jy = 1 - (*n - 1) * *incy; } if (*incx == 1) { i__1 = *n; for (j = 1; j <= *n; ++j) { i__2 = jy; if (Y(jy).r != 0.f || Y(jy).i != 0.f) { i__2 = jy; q__1.r = alpha->r * Y(jy).r - alpha->i * Y(jy).i, q__1.i = alpha->r * Y(jy).i + alpha->i * Y(jy).r; temp.r = q__1.r, temp.i = q__1.i; i__2 = *m; for (i = 1; i <= *m; ++i) { i__3 = i + j * a_dim1; i__4 = i + j * a_dim1; i__5 = i; q__2.r = X(i).r * temp.r - X(i).i * temp.i, q__2.i = X(i).r * temp.i + X(i).i * temp.r; q__1.r = A(i,j).r + q__2.r, q__1.i = A(i,j).i + q__2.i; A(i,j).r = q__1.r, A(i,j).i = q__1.i; /* L10: */ } } jy += *incy; /* L20: */ } } else { if (*incx > 0) { kx = 1; } else { kx = 1 - (*m - 1) * *incx; } i__1 = *n; for (j = 1; j <= *n; ++j) { i__2 = jy; if (Y(jy).r != 0.f || Y(jy).i != 0.f) { i__2 = jy; q__1.r = alpha->r * Y(jy).r - alpha->i * Y(jy).i, q__1.i = alpha->r * Y(jy).i + alpha->i * Y(jy).r; temp.r = q__1.r, temp.i = q__1.i; ix = kx; i__2 = *m; for (i = 1; i <= *m; ++i) { i__3 = i + j * a_dim1; i__4 = i + j * a_dim1; i__5 = ix; q__2.r = X(ix).r * temp.r - X(ix).i * temp.i, q__2.i = X(ix).r * temp.i + X(ix).i * temp.r; q__1.r = A(i,j).r + q__2.r, q__1.i = A(i,j).i + q__2.i; A(i,j).r = q__1.r, A(i,j).i = q__1.i; ix += *incx; /* L30: */ } } jy += *incy; /* L40: */ } } return 0; /* End of CGERU . */ } /* cgeru_ */ SuperLU_DIST_5.3.0/CBLAS/cher2.c0000644013363400111340000003057613233431301014642 0ustar xiaoyessg /* -- translated by f2c (version 19940927). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ #include #include "f2c.h" /* Subroutine */ int cher2_(char *uplo, integer *n, complex *alpha, complex * x, integer *incx, complex *y, integer *incy, complex *a, integer *lda) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; doublereal d__1; complex q__1, q__2, q__3, q__4; /* Builtin functions */ void r_cnjg(complex *, complex *); /* Local variables */ static integer info; static complex temp1, temp2; static integer i, j; static integer ix, iy, jx, jy, kx, ky; extern /* Subroutine */ int input_error_dist(char *, integer *); /* Purpose ======= CHER2 performs the hermitian rank 2 operation A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A, where alpha is a scalar, x and y are n element vectors and A is an n by n hermitian matrix. Parameters ========== UPLO - CHARACTER*1. On entry, UPLO specifies whether the upper or lower triangular part of the array A is to be referenced as follows: UPLO = 'U' or 'u' Only the upper triangular part of A is to be referenced. UPLO = 'L' or 'l' Only the lower triangular part of A is to be referenced. Unchanged on exit. N - INTEGER. On entry, N specifies the order of the matrix A. N must be at least zero. Unchanged on exit. ALPHA - COMPLEX . On entry, ALPHA specifies the scalar alpha. Unchanged on exit. X - COMPLEX array of dimension at least ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented array X must contain the n element vector x. Unchanged on exit. INCX - INTEGER. On entry, INCX specifies the increment for the elements of X. INCX must not be zero. Unchanged on exit. Y - COMPLEX array of dimension at least ( 1 + ( n - 1 )*abs( INCY ) ). Before entry, the incremented array Y must contain the n element vector y. Unchanged on exit. INCY - INTEGER. On entry, INCY specifies the increment for the elements of Y. INCY must not be zero. Unchanged on exit. A - COMPLEX array of DIMENSION ( LDA, n ). Before entry with UPLO = 'U' or 'u', the leading n by n upper triangular part of the array A must contain the upper triangular part of the hermitian matrix and the strictly lower triangular part of A is not referenced. On exit, the upper triangular part of the array A is overwritten by the upper triangular part of the updated matrix. Before entry with UPLO = 'L' or 'l', the leading n by n lower triangular part of the array A must contain the lower triangular part of the hermitian matrix and the strictly upper triangular part of A is not referenced. On exit, the lower triangular part of the array A is overwritten by the lower triangular part of the updated matrix. Note that the imaginary parts of the diagonal elements need not be set, they are assumed to be zero, and on exit they are set to zero. LDA - INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. LDA must be at least max( 1, n ). Unchanged on exit. Level 2 Blas routine. -- Written on 22-October-1986. Jack Dongarra, Argonne National Lab. Jeremy Du Croz, Nag Central Office. Sven Hammarling, Nag Central Office. Richard Hanson, Sandia National Labs. Test the input parameters. Parameter adjustments Function Body */ #define X(I) x[(I)-1] #define Y(I) y[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] info = 0; if (strncmp(uplo, "U", 1)!=0 && strncmp(uplo, "L", 1) != 0) { info = 1; } else if (*n < 0) { info = 2; } else if (*incx == 0) { info = 5; } else if (*incy == 0) { info = 7; } else if (*lda < max(1,*n)) { info = 9; } if (info != 0) { input_error_dist("CHER2 ", &info); return 0; } /* Quick return if possible. */ if (*n == 0 || alpha->r == 0.f && alpha->i == 0.f) { return 0; } /* Set up the start points in X and Y if the increments are not both unity. */ if (*incx != 1 || *incy != 1) { if (*incx > 0) { kx = 1; } else { kx = 1 - (*n - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (*n - 1) * *incy; } jx = kx; jy = ky; } /* Start the operations. In this version the elements of A are accessed sequentially with one pass through the triangular part of A. */ if (strncmp(uplo, "U", 1)==0) { /* Form A when A is stored in the upper triangle. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= *n; ++j) { i__2 = j; i__3 = j; if (X(j).r != 0.f || X(j).i != 0.f || (Y(j).r != 0.f || Y(j).i != 0.f)) { r_cnjg(&q__2, &Y(j)); q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = alpha->r * q__2.i + alpha->i * q__2.r; temp1.r = q__1.r, temp1.i = q__1.i; i__2 = j; q__2.r = alpha->r * X(j).r - alpha->i * X(j).i, q__2.i = alpha->r * X(j).i + alpha->i * X(j) .r; r_cnjg(&q__1, &q__2); temp2.r = q__1.r, temp2.i = q__1.i; i__2 = j - 1; for (i = 1; i <= j-1; ++i) { i__3 = i + j * a_dim1; i__4 = i + j * a_dim1; i__5 = i; q__3.r = X(i).r * temp1.r - X(i).i * temp1.i, q__3.i = X(i).r * temp1.i + X(i).i * temp1.r; q__2.r = A(i,j).r + q__3.r, q__2.i = A(i,j).i + q__3.i; i__6 = i; q__4.r = Y(i).r * temp2.r - Y(i).i * temp2.i, q__4.i = Y(i).r * temp2.i + Y(i).i * temp2.r; q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; A(i,j).r = q__1.r, A(i,j).i = q__1.i; /* L10: */ } i__2 = j + j * a_dim1; i__3 = j + j * a_dim1; i__4 = j; q__2.r = X(j).r * temp1.r - X(j).i * temp1.i, q__2.i = X(j).r * temp1.i + X(j).i * temp1.r; i__5 = j; q__3.r = Y(j).r * temp2.r - Y(j).i * temp2.i, q__3.i = Y(j).r * temp2.i + Y(j).i * temp2.r; q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; d__1 = A(j,j).r + q__1.r; A(j,j).r = d__1, A(j,j).i = 0.f; } else { i__2 = j + j * a_dim1; i__3 = j + j * a_dim1; d__1 = A(j,j).r; A(j,j).r = d__1, A(j,j).i = 0.f; } /* L20: */ } } else { i__1 = *n; for (j = 1; j <= *n; ++j) { i__2 = jx; i__3 = jy; if (X(jx).r != 0.f || X(jx).i != 0.f || (Y(jy).r != 0.f || Y(jy).i != 0.f)) { r_cnjg(&q__2, &Y(jy)); q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = alpha->r * q__2.i + alpha->i * q__2.r; temp1.r = q__1.r, temp1.i = q__1.i; i__2 = jx; q__2.r = alpha->r * X(jx).r - alpha->i * X(jx).i, q__2.i = alpha->r * X(jx).i + alpha->i * X(jx) .r; r_cnjg(&q__1, &q__2); temp2.r = q__1.r, temp2.i = q__1.i; ix = kx; iy = ky; i__2 = j - 1; for (i = 1; i <= j-1; ++i) { i__3 = i + j * a_dim1; i__4 = i + j * a_dim1; i__5 = ix; q__3.r = X(ix).r * temp1.r - X(ix).i * temp1.i, q__3.i = X(ix).r * temp1.i + X(ix).i * temp1.r; q__2.r = A(i,j).r + q__3.r, q__2.i = A(i,j).i + q__3.i; i__6 = iy; q__4.r = Y(iy).r * temp2.r - Y(iy).i * temp2.i, q__4.i = Y(iy).r * temp2.i + Y(iy).i * temp2.r; q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; A(i,j).r = q__1.r, A(i,j).i = q__1.i; ix += *incx; iy += *incy; /* L30: */ } i__2 = j + j * a_dim1; i__3 = j + j * a_dim1; i__4 = jx; q__2.r = X(jx).r * temp1.r - X(jx).i * temp1.i, q__2.i = X(jx).r * temp1.i + X(jx).i * temp1.r; i__5 = jy; q__3.r = Y(jy).r * temp2.r - Y(jy).i * temp2.i, q__3.i = Y(jy).r * temp2.i + Y(jy).i * temp2.r; q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; d__1 = A(j,j).r + q__1.r; A(j,j).r = d__1, A(j,j).i = 0.f; } else { i__2 = j + j * a_dim1; i__3 = j + j * a_dim1; d__1 = A(j,j).r; A(j,j).r = d__1, A(j,j).i = 0.f; } jx += *incx; jy += *incy; /* L40: */ } } } else { /* Form A when A is stored in the lower triangle. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= *n; ++j) { i__2 = j; i__3 = j; if (X(j).r != 0.f || X(j).i != 0.f || (Y(j).r != 0.f || Y(j).i != 0.f)) { r_cnjg(&q__2, &Y(j)); q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = alpha->r * q__2.i + alpha->i * q__2.r; temp1.r = q__1.r, temp1.i = q__1.i; i__2 = j; q__2.r = alpha->r * X(j).r - alpha->i * X(j).i, q__2.i = alpha->r * X(j).i + alpha->i * X(j) .r; r_cnjg(&q__1, &q__2); temp2.r = q__1.r, temp2.i = q__1.i; i__2 = j + j * a_dim1; i__3 = j + j * a_dim1; i__4 = j; q__2.r = X(j).r * temp1.r - X(j).i * temp1.i, q__2.i = X(j).r * temp1.i + X(j).i * temp1.r; i__5 = j; q__3.r = Y(j).r * temp2.r - Y(j).i * temp2.i, q__3.i = Y(j).r * temp2.i + Y(j).i * temp2.r; q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; d__1 = A(j,j).r + q__1.r; A(j,j).r = d__1, A(j,j).i = 0.f; i__2 = *n; for (i = j + 1; i <= *n; ++i) { i__3 = i + j * a_dim1; i__4 = i + j * a_dim1; i__5 = i; q__3.r = X(i).r * temp1.r - X(i).i * temp1.i, q__3.i = X(i).r * temp1.i + X(i).i * temp1.r; q__2.r = A(i,j).r + q__3.r, q__2.i = A(i,j).i + q__3.i; i__6 = i; q__4.r = Y(i).r * temp2.r - Y(i).i * temp2.i, q__4.i = Y(i).r * temp2.i + Y(i).i * temp2.r; q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; A(i,j).r = q__1.r, A(i,j).i = q__1.i; /* L50: */ } } else { i__2 = j + j * a_dim1; i__3 = j + j * a_dim1; d__1 = A(j,j).r; A(j,j).r = d__1, A(j,j).i = 0.f; } /* L60: */ } } else { i__1 = *n; for (j = 1; j <= *n; ++j) { i__2 = jx; i__3 = jy; if (X(jx).r != 0.f || X(jx).i != 0.f || (Y(jy).r != 0.f || Y(jy).i != 0.f)) { r_cnjg(&q__2, &Y(jy)); q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = alpha->r * q__2.i + alpha->i * q__2.r; temp1.r = q__1.r, temp1.i = q__1.i; i__2 = jx; q__2.r = alpha->r * X(jx).r - alpha->i * X(jx).i, q__2.i = alpha->r * X(jx).i + alpha->i * X(jx) .r; r_cnjg(&q__1, &q__2); temp2.r = q__1.r, temp2.i = q__1.i; i__2 = j + j * a_dim1; i__3 = j + j * a_dim1; i__4 = jx; q__2.r = X(jx).r * temp1.r - X(jx).i * temp1.i, q__2.i = X(jx).r * temp1.i + X(jx).i * temp1.r; i__5 = jy; q__3.r = Y(jy).r * temp2.r - Y(jy).i * temp2.i, q__3.i = Y(jy).r * temp2.i + Y(jy).i * temp2.r; q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; d__1 = A(j,j).r + q__1.r; A(j,j).r = d__1, A(j,j).i = 0.f; ix = jx; iy = jy; i__2 = *n; for (i = j + 1; i <= *n; ++i) { ix += *incx; iy += *incy; i__3 = i + j * a_dim1; i__4 = i + j * a_dim1; i__5 = ix; q__3.r = X(ix).r * temp1.r - X(ix).i * temp1.i, q__3.i = X(ix).r * temp1.i + X(ix).i * temp1.r; q__2.r = A(i,j).r + q__3.r, q__2.i = A(i,j).i + q__3.i; i__6 = iy; q__4.r = Y(iy).r * temp2.r - Y(iy).i * temp2.i, q__4.i = Y(iy).r * temp2.i + Y(iy).i * temp2.r; q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; A(i,j).r = q__1.r, A(i,j).i = q__1.i; /* L70: */ } } else { i__2 = j + j * a_dim1; i__3 = j + j * a_dim1; d__1 = A(j,j).r; A(j,j).r = d__1, A(j,j).i = 0.f; } jx += *incx; jy += *incy; /* L80: */ } } } return 0; /* End of CHER2 . */ } /* cher2_ */ SuperLU_DIST_5.3.0/CBLAS/chemv.c0000644013363400111340000002730113233431301014731 0ustar xiaoyessg /* -- translated by f2c (version 19940927). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ #include #include "f2c.h" /* Subroutine */ int chemv_(char *uplo, integer *n, complex *alpha, complex * a, integer *lda, complex *x, integer *incx, complex *beta, complex *y, integer *incy) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; doublereal d__1; complex q__1, q__2, q__3, q__4; /* Builtin functions */ void r_cnjg(complex *, complex *); /* Local variables */ static integer info; static complex temp1, temp2; static integer i, j; static integer ix, iy, jx, jy, kx, ky; extern /* Subroutine */ int input_error_dist(char *, integer *); /* Purpose ======= CHEMV performs the matrix-vector operation y := alpha*A*x + beta*y, where alpha and beta are scalars, x and y are n element vectors and A is an n by n hermitian matrix. Parameters ========== UPLO - CHARACTER*1. On entry, UPLO specifies whether the upper or lower triangular part of the array A is to be referenced as follows: UPLO = 'U' or 'u' Only the upper triangular part of A is to be referenced. UPLO = 'L' or 'l' Only the lower triangular part of A is to be referenced. Unchanged on exit. N - INTEGER. On entry, N specifies the order of the matrix A. N must be at least zero. Unchanged on exit. ALPHA - COMPLEX . On entry, ALPHA specifies the scalar alpha. Unchanged on exit. A - COMPLEX array of DIMENSION ( LDA, n ). Before entry with UPLO = 'U' or 'u', the leading n by n upper triangular part of the array A must contain the upper triangular part of the hermitian matrix and the strictly lower triangular part of A is not referenced. Before entry with UPLO = 'L' or 'l', the leading n by n lower triangular part of the array A must contain the lower triangular part of the hermitian matrix and the strictly upper triangular part of A is not referenced. Note that the imaginary parts of the diagonal elements need not be set and are assumed to be zero. Unchanged on exit. LDA - INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. LDA must be at least max( 1, n ). Unchanged on exit. X - COMPLEX array of dimension at least ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented array X must contain the n element vector x. Unchanged on exit. INCX - INTEGER. On entry, INCX specifies the increment for the elements of X. INCX must not be zero. Unchanged on exit. BETA - COMPLEX . On entry, BETA specifies the scalar beta. When BETA is supplied as zero then Y need not be set on input. Unchanged on exit. Y - COMPLEX array of dimension at least ( 1 + ( n - 1 )*abs( INCY ) ). Before entry, the incremented array Y must contain the n element vector y. On exit, Y is overwritten by the updated vector y. INCY - INTEGER. On entry, INCY specifies the increment for the elements of Y. INCY must not be zero. Unchanged on exit. Level 2 Blas routine. -- Written on 22-October-1986. Jack Dongarra, Argonne National Lab. Jeremy Du Croz, Nag Central Office. Sven Hammarling, Nag Central Office. Richard Hanson, Sandia National Labs. Test the input parameters. Parameter adjustments Function Body */ #define X(I) x[(I)-1] #define Y(I) y[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] info = 0; if ( strncmp(uplo, "U", 1)!=0 && strncmp(uplo, "L", 1) !=0 ) { info = 1; } else if (*n < 0) { info = 2; } else if (*lda < max(1,*n)) { info = 5; } else if (*incx == 0) { info = 7; } else if (*incy == 0) { info = 10; } if (info != 0) { input_error_dist("CHEMV ", &info); return 0; } /* Quick return if possible. */ if (*n == 0 || alpha->r == 0.f && alpha->i == 0.f && (beta->r == 1.f && beta->i == 0.f)) { return 0; } /* Set up the start points in X and Y. */ if (*incx > 0) { kx = 1; } else { kx = 1 - (*n - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (*n - 1) * *incy; } /* Start the operations. In this version the elements of A are accessed sequentially with one pass through the triangular part of A. First form y := beta*y. */ if (beta->r != 1.f || beta->i != 0.f) { if (*incy == 1) { if (beta->r == 0.f && beta->i == 0.f) { i__1 = *n; for (i = 1; i <= *n; ++i) { i__2 = i; Y(i).r = 0.f, Y(i).i = 0.f; /* L10: */ } } else { i__1 = *n; for (i = 1; i <= *n; ++i) { i__2 = i; i__3 = i; q__1.r = beta->r * Y(i).r - beta->i * Y(i).i, q__1.i = beta->r * Y(i).i + beta->i * Y(i) .r; Y(i).r = q__1.r, Y(i).i = q__1.i; /* L20: */ } } } else { iy = ky; if (beta->r == 0.f && beta->i == 0.f) { i__1 = *n; for (i = 1; i <= *n; ++i) { i__2 = iy; Y(iy).r = 0.f, Y(iy).i = 0.f; iy += *incy; /* L30: */ } } else { i__1 = *n; for (i = 1; i <= *n; ++i) { i__2 = iy; i__3 = iy; q__1.r = beta->r * Y(iy).r - beta->i * Y(iy).i, q__1.i = beta->r * Y(iy).i + beta->i * Y(iy) .r; Y(iy).r = q__1.r, Y(iy).i = q__1.i; iy += *incy; /* L40: */ } } } } if (alpha->r == 0.f && alpha->i == 0.f) { return 0; } if (strncmp(uplo, "U", 1)==0) { /* Form y when A is stored in upper triangle. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= *n; ++j) { i__2 = j; q__1.r = alpha->r * X(j).r - alpha->i * X(j).i, q__1.i = alpha->r * X(j).i + alpha->i * X(j).r; temp1.r = q__1.r, temp1.i = q__1.i; temp2.r = 0.f, temp2.i = 0.f; i__2 = j - 1; for (i = 1; i <= j-1; ++i) { i__3 = i; i__4 = i; i__5 = i + j * a_dim1; q__2.r = temp1.r * A(i,j).r - temp1.i * A(i,j).i, q__2.i = temp1.r * A(i,j).i + temp1.i * A(i,j) .r; q__1.r = Y(i).r + q__2.r, q__1.i = Y(i).i + q__2.i; Y(i).r = q__1.r, Y(i).i = q__1.i; r_cnjg(&q__3, &A(i,j)); i__3 = i; q__2.r = q__3.r * X(i).r - q__3.i * X(i).i, q__2.i = q__3.r * X(i).i + q__3.i * X(i).r; q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; temp2.r = q__1.r, temp2.i = q__1.i; /* L50: */ } i__2 = j; i__3 = j; i__4 = j + j * a_dim1; d__1 = A(j,j).r; q__3.r = d__1 * temp1.r, q__3.i = d__1 * temp1.i; q__2.r = Y(j).r + q__3.r, q__2.i = Y(j).i + q__3.i; q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = alpha->r * temp2.i + alpha->i * temp2.r; q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; Y(j).r = q__1.r, Y(j).i = q__1.i; /* L60: */ } } else { jx = kx; jy = ky; i__1 = *n; for (j = 1; j <= *n; ++j) { i__2 = jx; q__1.r = alpha->r * X(jx).r - alpha->i * X(jx).i, q__1.i = alpha->r * X(jx).i + alpha->i * X(jx).r; temp1.r = q__1.r, temp1.i = q__1.i; temp2.r = 0.f, temp2.i = 0.f; ix = kx; iy = ky; i__2 = j - 1; for (i = 1; i <= j-1; ++i) { i__3 = iy; i__4 = iy; i__5 = i + j * a_dim1; q__2.r = temp1.r * A(i,j).r - temp1.i * A(i,j).i, q__2.i = temp1.r * A(i,j).i + temp1.i * A(i,j) .r; q__1.r = Y(iy).r + q__2.r, q__1.i = Y(iy).i + q__2.i; Y(iy).r = q__1.r, Y(iy).i = q__1.i; r_cnjg(&q__3, &A(i,j)); i__3 = ix; q__2.r = q__3.r * X(ix).r - q__3.i * X(ix).i, q__2.i = q__3.r * X(ix).i + q__3.i * X(ix).r; q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; temp2.r = q__1.r, temp2.i = q__1.i; ix += *incx; iy += *incy; /* L70: */ } i__2 = jy; i__3 = jy; i__4 = j + j * a_dim1; d__1 = A(j,j).r; q__3.r = d__1 * temp1.r, q__3.i = d__1 * temp1.i; q__2.r = Y(jy).r + q__3.r, q__2.i = Y(jy).i + q__3.i; q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = alpha->r * temp2.i + alpha->i * temp2.r; q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; Y(jy).r = q__1.r, Y(jy).i = q__1.i; jx += *incx; jy += *incy; /* L80: */ } } } else { /* Form y when A is stored in lower triangle. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= *n; ++j) { i__2 = j; q__1.r = alpha->r * X(j).r - alpha->i * X(j).i, q__1.i = alpha->r * X(j).i + alpha->i * X(j).r; temp1.r = q__1.r, temp1.i = q__1.i; temp2.r = 0.f, temp2.i = 0.f; i__2 = j; i__3 = j; i__4 = j + j * a_dim1; d__1 = A(j,j).r; q__2.r = d__1 * temp1.r, q__2.i = d__1 * temp1.i; q__1.r = Y(j).r + q__2.r, q__1.i = Y(j).i + q__2.i; Y(j).r = q__1.r, Y(j).i = q__1.i; i__2 = *n; for (i = j + 1; i <= *n; ++i) { i__3 = i; i__4 = i; i__5 = i + j * a_dim1; q__2.r = temp1.r * A(i,j).r - temp1.i * A(i,j).i, q__2.i = temp1.r * A(i,j).i + temp1.i * A(i,j) .r; q__1.r = Y(i).r + q__2.r, q__1.i = Y(i).i + q__2.i; Y(i).r = q__1.r, Y(i).i = q__1.i; r_cnjg(&q__3, &A(i,j)); i__3 = i; q__2.r = q__3.r * X(i).r - q__3.i * X(i).i, q__2.i = q__3.r * X(i).i + q__3.i * X(i).r; q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; temp2.r = q__1.r, temp2.i = q__1.i; /* L90: */ } i__2 = j; i__3 = j; q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = alpha->r * temp2.i + alpha->i * temp2.r; q__1.r = Y(j).r + q__2.r, q__1.i = Y(j).i + q__2.i; Y(j).r = q__1.r, Y(j).i = q__1.i; /* L100: */ } } else { jx = kx; jy = ky; i__1 = *n; for (j = 1; j <= *n; ++j) { i__2 = jx; q__1.r = alpha->r * X(jx).r - alpha->i * X(jx).i, q__1.i = alpha->r * X(jx).i + alpha->i * X(jx).r; temp1.r = q__1.r, temp1.i = q__1.i; temp2.r = 0.f, temp2.i = 0.f; i__2 = jy; i__3 = jy; i__4 = j + j * a_dim1; d__1 = A(j,j).r; q__2.r = d__1 * temp1.r, q__2.i = d__1 * temp1.i; q__1.r = Y(jy).r + q__2.r, q__1.i = Y(jy).i + q__2.i; Y(jy).r = q__1.r, Y(jy).i = q__1.i; ix = jx; iy = jy; i__2 = *n; for (i = j + 1; i <= *n; ++i) { ix += *incx; iy += *incy; i__3 = iy; i__4 = iy; i__5 = i + j * a_dim1; q__2.r = temp1.r * A(i,j).r - temp1.i * A(i,j).i, q__2.i = temp1.r * A(i,j).i + temp1.i * A(i,j) .r; q__1.r = Y(iy).r + q__2.r, q__1.i = Y(iy).i + q__2.i; Y(iy).r = q__1.r, Y(iy).i = q__1.i; r_cnjg(&q__3, &A(i,j)); i__3 = ix; q__2.r = q__3.r * X(ix).r - q__3.i * X(ix).i, q__2.i = q__3.r * X(ix).i + q__3.i * X(ix).r; q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; temp2.r = q__1.r, temp2.i = q__1.i; /* L110: */ } i__2 = jy; i__3 = jy; q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = alpha->r * temp2.i + alpha->i * temp2.r; q__1.r = Y(jy).r + q__2.r, q__1.i = Y(jy).i + q__2.i; Y(jy).r = q__1.r, Y(jy).i = q__1.i; jx += *incx; jy += *incy; /* L120: */ } } } return 0; /* End of CHEMV . */ } /* chemv_ */ SuperLU_DIST_5.3.0/cmake/0000755013363400111340000000000013233431301013714 5ustar xiaoyessgSuperLU_DIST_5.3.0/cmake/FindParMETIS.cmake0000644013363400111340000000402513233431301017044 0ustar xiaoyessg# - Try to find ParMETIS # Once done this will define # # PARMETIS_FOUND - system has ParMETIS # PARMETIS_INCLUDE_DIRS - include directories for ParMETIS # PARMETIS_LIBRARIES - libraries for ParMETIS # # and the imported target # # ParMETIS::ParMETIS find_path(ParMETIS_INCLUDE_DIR parmetis.h DOC "Directory where the ParMETIS header files are located" ) mark_as_advanced(ParMETIS_INCLUDE_DIR) set(ParMETIS_INCLUDE_DIRS "${ParMETIS_INCLUDE_DIR}") find_library(ParMETIS_LIBRARY NAMES parmetis DOC "Directory where the ParMETIS library is located" ) mark_as_advanced(ParMETIS_LIBRARY) set(ParMETIS_LIBRARIES "${ParMETIS_LIBRARY}") # Get ParMETIS version if(NOT PARMETIS_VERSION_STRING AND PARMETIS_INCLUDE_DIR AND EXISTS "${PARMETIS_INCLUDE_DIR}/parmetis.h") set(version_pattern "^#define[\t ]+PARMETIS_(MAJOR|MINOR)_VERSION[\t ]+([0-9\\.]+)$") file(STRINGS "${PARMETIS_INCLUDE_DIR}/parmetis.h" parmetis_version REGEX ${version_pattern}) foreach(match ${parmetis_version}) if(PARMETIS_VERSION_STRING) set(PARMETIS_VERSION_STRING "${PARMETIS_VERSION_STRING}.") endif() string(REGEX REPLACE ${version_pattern} "${PARMETIS_VERSION_STRING}\\2" PARMETIS_VERSION_STRING ${match}) set(PARMETIS_VERSION_${CMAKE_MATCH_1} ${CMAKE_MATCH_2}) endforeach() unset(parmetis_version) unset(version_pattern) endif() # Standard package handling include(FindPackageHandleStandardArgs) find_package_handle_standard_args(ParMETIS REQUIRED_VARS ParMETIS_LIBRARY ParMETIS_INCLUDE_DIR VERSION_VAR PARMETIS_VERSION_STRING ) # Dependencies include(CMakeFindDependencyMacro) #find_dependency(MPI) find_dependency(METIS) if(ParMETIS_FOUND) if(NOT TARGET ParMETIS::ParMETIS) add_library(ParMETIS::ParMETIS UNKNOWN IMPORTED) endif() set_property(TARGET ParMETIS::ParMETIS PROPERTY IMPORTED_LOCATION "${ParMETIS_LIBRARY}") set_property(TARGET ParMETIS::ParMETIS PROPERTY INTERFACE_LINK_LIBRARIES METIS::METIS) set_property(TARGET ParMETIS::ParMETIS PROPERTY INTERFACE_INCLUDE_DIRECTORIES "${ParMETIS_INCLUDE_DIRS}") endif() SuperLU_DIST_5.3.0/cmake/FindMETIS.cmake0000644013363400111340000000327613233431301016410 0ustar xiaoyessg# - Try to find METIS # Once done this will define # # METIS_FOUND - system has METIS # METIS_INCLUDE_DIRS - include directories for METIS # METIS_LIBRARIES - libraries for METIS # # and the imported target # # METIS::METIS find_path(METIS_INCLUDE_DIR metis.h DOC "Directory where the METIS header files are located" ) mark_as_advanced(METIS_INCLUDE_DIR) set(METIS_INCLUDE_DIRS "${METIS_INCLUDE_DIR}") find_library(METIS_LIBRARY NAMES metis DOC "Directory where the METIS library is located" ) mark_as_advanced(METIS_LIBRARY) set(METIS_LIBRARIES "${METIS_LIBRARY}") # Get METIS version if(NOT METIS_VERSION_STRING AND METIS_INCLUDE_DIR AND EXISTS "${METIS_INCLUDE_DIR}/metis.h") set(version_pattern "^#define[\t ]+METIS_(MAJOR|MINOR)_VERSION[\t ]+([0-9\\.]+)$") file(STRINGS "${METIS_INCLUDE_DIR}/metis.h" metis_version REGEX ${version_pattern}) foreach(match ${metis_version}) if(METIS_VERSION_STRING) set(METIS_VERSION_STRING "${METIS_VERSION_STRING}.") endif() string(REGEX REPLACE ${version_pattern} "${METIS_VERSION_STRING}\\2" METIS_VERSION_STRING ${match}) set(METIS_VERSION_${CMAKE_MATCH_1} ${CMAKE_MATCH_2}) endforeach() unset(metis_version) unset(version_pattern) endif() # Standard package handling include(FindPackageHandleStandardArgs) find_package_handle_standard_args(METIS REQUIRED_VARS METIS_LIBRARY METIS_INCLUDE_DIR VERSION_VAR METIS_VERSION_STRING ) if(METIS_FOUND) if(NOT TARGET METIS::METIS) add_library(METIS::METIS UNKNOWN IMPORTED) endif() set_property(TARGET METIS::METIS PROPERTY IMPORTED_LOCATION "${METIS_LIBRARY}") set_property(TARGET METIS::METIS PROPERTY INTERFACE_INCLUDE_DIRECTORIES "${METIS_INCLUDE_DIRS}") endif() SuperLU_DIST_5.3.0/cmake/XSDKDefaults.cmake0000644013363400111340000001377413233431301017173 0ustar xiaoyessg################################################################################## # # Set defaults for XSDK CMake projects # ################################################################################## # # This module implements standard behavior for XSDK CMake projects. The main # thing it does in XSDK mode (i.e. USE_XSDK_DEFAULTS=TRUE) is to print out # when the env vars CC, CXX, FC and compiler flags CFLAGS, CXXFLAGS, and # FFLAGS/FCFLAGS are used to select the compilers and compiler flags (raw # CMake does this silently) and to set BUILD_SHARED_LIBS=TRUE and # CMAKE_BUILD_TYPE=DEBUG by default. It does not implement *all* of the # standard XSDK configuration parameters. The parent CMake project must do # that. # # Note that when USE_XSDK_DEFAULTS=TRUE, then the Fortran flags will be read # from either of the env vars FFLAGS or FCFLAGS. If both are set, but are the # same, then FFLAGS it used (which is the same as FCFLAGS). However, if both # are set but are not equal, then a FATAL_ERROR is raised and CMake configure # processing is stopped. # # To be used in a parent project, this module must be included after # # PROJECT(${PROJECT_NAME} NONE) # # is called but before the compilers are defined and processed using: # # ENABLE_LANGUAGE() # # For example, one would do: # # PROJECT(${PROJECT_NAME} NONE) # ... # SET(USE_XSDK_DEFAULTS_DEFAULT TRUE) # Set to false if desired # INCLUDE("${CMAKE_CURRENT_SOURCE_DIR}/stdk/XSDKDefaults.cmake") # ... # ENABLE_LANGUAGE(C) # ENABLE_LANGUAGE(C++) # ENABLE_LANGUAGE(Fortran) # # The variable `USE_XSDK_DEFAULTS_DEFAULT` is used as the default for the # cache var `USE_XSDK_DEFAULTS`. That way, a project can decide if it wants # XSDK defaults turned on or off by default and users can independently decide # if they want the CMake project to use standard XSDK behavior or raw CMake # behavior. # # By default, the XSDKDefaults.cmake module assumes that the project will need # C, C++, and Fortran. If any language is not needed then, set # XSDK_ENABLE_C=OFF, XSDK_ENABLE_CXX=OFF, or XSDK_ENABLE_Fortran=OFF *before* # including this module. Note, these variables are *not* cache vars because a # project either does or does not have C, C++ or Fortran source files, the # user has nothing to do with this so there is no need for cache vars. The # parent CMake project just needs to tell XSDKDefault.cmake what languages is # needs or does not need. # # For example, if the parent CMake project only needs C, then it would do: # # PROJECT(${PROJECT_NAME} NONE)' # ... # SET(USE_XSDK_DEFAULTS_DEFAULT TRUE) # SET(XSDK_ENABLE_CXX OFF) # SET(XSDK_ENABLE_Fortran OFF) # INCLUDE("${CMAKE_CURRENT_SOURCE_DIR}/stdk/XSDKDefaults.cmake") # ... # ENABLE_LANGAUGE(C) # # This module code will announce when it sets any variables. # # # Helper functions # IF (NOT COMMAND PRINT_VAR) FUNCTION(PRINT_VAR VAR_NAME) MESSAGE("${VAR_NAME} = '${${VAR_NAME}}'") ENDFUNCTION() ENDIF() IF (NOT COMMAND SET_DEFAULT) MACRO(SET_DEFAULT VAR) IF ("${${VAR}}" STREQUAL "") SET(${VAR} ${ARGN}) ENDIF() ENDMACRO() ENDIF() # # XSDKDefaults.cmake control variables # # USE_XSDK_DEFAULTS IF ("${USE_XSDK_DEFAULTS_DEFAULT}" STREQUAL "") SET(USE_XSDK_DEFAULTS_DEFAULT FALSE) ENDIF() SET(USE_XSDK_DEFAULTS ${USE_XSDK_DEFAULTS_DEFAULT} CACHE BOOL "Use XSDK defaults and behavior.") PRINT_VAR(USE_XSDK_DEFAULTS) SET_DEFAULT(XSDK_ENABLE_C TRUE) SET_DEFAULT(XSDK_ENABLE_CXX TRUE) SET_DEFAULT(XSDK_ENABLE_Fortran TRUE) # Handle the compiler and flags for a language MACRO(XSDK_HANDLE_LANG_DEFAULTS CMAKE_LANG_NAME ENV_LANG_NAME ENV_LANG_FLAGS_NAMES ) # Announce using env var ${ENV_LANG_NAME} IF (NOT "$ENV{${ENV_LANG_NAME}}" STREQUAL "" AND "${CMAKE_${CMAKE_LANG_NAME}_COMPILER}" STREQUAL "" ) MESSAGE("-- " "XSDK: Setting CMAKE_${CMAKE_LANG_NAME}_COMPILER from env var" " ${ENV_LANG_NAME}='$ENV{${ENV_LANG_NAME}}'!") SET(CMAKE_${CMAKE_LANG_NAME}_COMPILER "$ENV{${ENV_LANG_NAME}}" CACHE FILEPATH "XSDK: Set by default from env var ${ENV_LANG_NAME}") ENDIF() # Announce using env var ${ENV_LANG_FLAGS_NAME} FOREACH(ENV_LANG_FLAGS_NAME ${ENV_LANG_FLAGS_NAMES}) IF (NOT "$ENV{${ENV_LANG_FLAGS_NAME}}" STREQUAL "" AND "${CMAKE_${CMAKE_LANG_NAME}_FLAGS}" STREQUAL "" ) MESSAGE("-- " "XSDK: Setting CMAKE_${CMAKE_LANG_NAME}_FLAGS from env var" " ${ENV_LANG_FLAGS_NAME}='$ENV{${ENV_LANG_FLAGS_NAME}}'!") SET(CMAKE_${CMAKE_LANG_NAME}_FLAGS "$ENV{${ENV_LANG_FLAGS_NAME}} " CACHE STRING "XSDK: Set by default from env var ${ENV_LANG_FLAGS_NAME}") # NOTE: CMake adds the space after $ENV{${ENV_LANG_FLAGS_NAME}} so we # duplicate that here! ENDIF() ENDFOREACH() ENDMACRO() # # Set XSDK Defaults # # Set default compilers and flags IF (USE_XSDK_DEFAULTS) # Handle env vars for languages C, C++, and Fortran IF (XSDK_ENABLE_C) XSDK_HANDLE_LANG_DEFAULTS(C CC CFLAGS) ENDIF() IF (XSDK_ENABLE_CXX) XSDK_HANDLE_LANG_DEFAULTS(CXX CXX CXXFLAGS) ENDIF() IF (XSDK_ENABLE_Fortran) SET(ENV_FFLAGS "$ENV{FFLAGS}") SET(ENV_FCFLAGS "$ENV{FCFLAGS}") IF ( (NOT "${ENV_FFLAGS}" STREQUAL "") AND (NOT "${ENV_FCFLAGS}" STREQUAL "") AND ("${CMAKE_Fortran_FLAGS}" STREQUAL "") ) IF (NOT "${ENV_FFLAGS}" STREQUAL "${ENV_FCFLAGS}") MESSAGE(FATAL_ERROR "Error, env vars FFLAGS='${ENV_FFLAGS}' and" " FCFLAGS='${ENV_FCFLAGS}' are both set in the env but are not equal!") ENDIF() ENDIF() XSDK_HANDLE_LANG_DEFAULTS(Fortran FC "FFLAGS;FCFLAGS") ENDIF() # Set XSDK defaults for other CMake variables IF ("${BUILD_SHARED_LIBS}" STREQUAL "") MESSAGE("-- " "XSDK: Setting default BUILD_SHARED_LIBS=TRUE") SET(BUILD_SHARED_LIBS TRUE CACHE BOOL "Set by default in XSDK mode") ENDIF() IF ("${CMAKE_BUILD_TYPE}" STREQUAL "") MESSAGE("-- " "XSDK: Setting default CMAKE_BUILD_TYPE=DEBUG") SET(CMAKE_BUILD_TYPE DEBUG CACHE STRING "Set by default in XSDK mode") ENDIF() ENDIF() SuperLU_DIST_5.3.0/README.md0000644013363400111340000002764113233431301014125 0ustar xiaoyessg# SuperLU_DIST (version 5.3) [![Build Status](https://travis-ci.org/xiaoyeli/superlu_dist.svg?branch=master)](https://travis-ci.org/xiaoyeli/superlu_dist) [Nightly tests](http://my.cdash.org/index.php?project=superlu_dist) SuperLU_DIST contains a set of subroutines to solve a sparse linear system A*X=B. It uses Gaussian elimination with static pivoting (GESP). Static pivoting is a technique that combines the numerical stability of partial pivoting with the scalability of Cholesky (no pivoting), to run accurately and efficiently on large numbers of processors. SuperLU_DIST is a parallel extension to the serial SuperLU library. It is targeted for the distributed memory parallel machines. SuperLU_DIST is implemented in ANSI C, and MPI for communications. Currently, the LU factorization and triangular solution routines, which are the most time-consuming part of the solution process, are parallelized. The other routines, such as static pivoting and column preordering for sparsity are performed sequentially. This "alpha" release contains double-precision real and double-precision complex data types. ### The distribution contains the following directory structure: ``` SuperLU_DIST/README instructions on installation SuperLU_DIST/CBLAS/ needed BLAS routines in C, not necessarily fast (NOTE: this version is single threaded. If you use the library with multiple OpenMP threads, performance relies on a good multithreaded BLAS implementation.) SuperLU_DIST/DOC/ the Users' Guide SuperLU_DIST/EXAMPLE/ example programs SuperLU_DIST/INSTALL/ test machine dependent parameters SuperLU_DIST/SRC/ C source code, to be compiled into libsuperlu_dist.a SuperLU_DIST/TEST/ testing code SuperLU_DIST/lib/ contains library archive libsuperlu_dist.a SuperLU_DIST/Makefile top-level Makefile that does installation and testing SuperLU_DIST/make.inc compiler, compiler flags, library definitions and C preprocessor definitions, included in all Makefiles. (You may need to edit it to suit your system before compiling the whole package.) SuperLU_DIST/MAKE_INC/ sample machine-specific make.inc files ``` ## INSTALLATION There are two ways to install the package. One requires users to edit makefile manually, the other uses CMake build system. The procedures are described below. ### Installation option 1: Manual installation with makefile. Before installing the package, please examine the three things dependent on your system setup: 1.1 Edit the make.inc include file. This make include file is referenced inside each of the Makefiles in the various subdirectories. As a result, there is no need to edit the Makefiles in the subdirectories. All information that is machine specific has been defined in this include file. Sample machine-specific make.inc are provided in the MAKE_INC/ directory for several platforms, such as Cray XT5, Linux, Mac-OS, and CUDA. When you have selected the machine to which you wish to install SuperLU_DIST, copy the appropriate sample include file (if one is present) into make.inc. For example, if you wish to run SuperLU_DIST on a Cray XT5, you can do `cp MAKE_INC/make.xt5 make.inc` For the systems other than listed above, some porting effort is needed for parallel factorization routines. Please refer to the Users' Guide for detailed instructions on porting. The following CPP definitions can be set in CFLAGS. ``` -DXSDK_INDEX_SIZE=64 use 64-bit integers for indexing sparse matrices. (default 32 bit) -DPRNTlevel=[0,1,2,...] printing level to show solver's execution details. (default 0) -DDEBUGlevel=[0,1,2,...] diagnostic printing level for debugging purpose. (default 0) ``` 1.2. The BLAS library. The parallel routines in SuperLU_DIST use some BLAS routines on each MPI process. Moreover, if you enable OpenMP with multiple threads, you need to link with a multithreaded BLAS library. Otherwise performance will be poor. A good public domain BLAS library is OpenBLAS (http://www.openblas.net), which has OpenMP support. If you have a BLAS library your machine, you may define the following in the file make.inc: ``` BLASDEF = -DUSE_VENDOR_BLAS BLASLIB = ``` The CBLAS/ subdirectory contains the part of the C BLAS (single threaded) needed by SuperLU_DIST package. However, these codes are intended for use only if there is no faster implementation of the BLAS already available on your machine. In this case, you should go to the top-level SuperLU_DIST/ directory and do the following: 1) In make.inc, undefine (comment out) BLASDEF, and define: ` BLASLIB = ../lib/libblas$(PLAT).a` 2) Type: `make blaslib` to make the BLAS library from the routines in the ` CBLAS/ subdirectory.` 1.3. External libraries: Metis and ParMetis. If you will use Metis or ParMetis ordering, you will need to install them yourself. Since ParMetis package already contains the source code for the Metis library, you can just download and compile ParMetis from: [http://glaros.dtc.umn.edu/gkhome/metis/parmetis/download](http://glaros.dtc.umn.edu/gkhome/metis/parmetis/download) After you have installed it, you should define the following in make.inc: ``` METISLIB = -L -lmetis PARMETISLIB = -L -lparmetis I_PARMETIS = -I/include -I/metis/include ``` You can disable ParMetis with the following line in SRC/superlu_dist_config.h: ``` #undef HAVE_PARMETIS ``` 1.4. C preprocessor definition CDEFS. In the header file SRC/Cnames.h, we use macros to determine how C routines should be named so that they are callable by Fortran. (Some vendor-supplied BLAS libraries do not have C interfaces. So the re-naming is needed in order for the SuperLU BLAS calls (in C) to interface with the Fortran-style BLAS.) The possible options for CDEFS are: ``` -DAdd_: Fortran expects a C routine to have an underscore postfixed to the name; (This is set as the default) -DNoChange: Fortran expects a C routine name to be identical to that compiled by C; -DUpCase: Fortran expects a C routine name to be all uppercase. ``` 1.5. Multicore and GPU (optional). To use OpenMP parallelism, need to link with an OpenMP library, and set the number of threads you wish to use as follows (bash): `export OMP_NUM_THREADS=<##>` To enable NVIDIA GPU access, need to take the following 2 step: 1) Set the following Linux environment variable: `export ACC=GPU` 2) Add the CUDA library location in make.inc: ``` ifeq "${ACC}" "GPU" CFLAGS += -DGPU_ACC INCS += -I/include LIBS += -L/lib64 -lcublas -lcudart endif ``` A Makefile is provided in each subdirectory. The installation can be done completely automatically by simply typing "make" at the top level. ### Installation option 2: Using CMake build system. You will need to create a build tree from which to invoke CMake. First, in order to use parallel symbolic factorization function, you need to install ParMETIS parallel ordering package and define the two environment variables: PARMETIS_ROOT and PARMETIS_BUILD_DIR ``` export PARMETIS_ROOT= export PARMETIS_BUILD_DIR=${PARMETIS_ROOT}/build/Linux-x86_64 ``` Then, the installation procedure is the following. From the top level directory, do: ``` mkdir build ; cd build cmake .. \ -DTPL_PARMETIS_LIBRARIES="${PARMETIS_BUILD_DIR}/libparmetis/libparmetis.a;${PARMETIS_BUILD_DIR}/libmetis/libmetis.a" \ -DTPL_PARMETIS_INCLUDE_DIRS="${PARMETIS_ROOT}/include;${PARMETIS_ROOT}/metis/include" ( Example cmake script: see run_cmake_build.sh export PARMETIS_ROOT=~/lib/dynamic/parmetis-4.0.3 export PARMETIS_BUILD_DIR=${PARMETIS_ROOT}/build/Linux-x86_64 cmake .. \ -DTPL_PARMETIS_INCLUDE_DIRS="${PARMETIS_ROOT}/include;${PARMETIS_ROOT}/metis/include" \ -DTPL_PARMETIS_LIBRARIES="${PARMETIS_BUILD_DIR}/libparmetis/libparmetis.a;${PARMETIS_BUILD_DIR}/libmetis/libmetis.a" \ -DCMAKE_C_FLAGS="-std=c99 -g" \ -Denable_blaslib=OFF \ -DBUILD_SHARED_LIBS=OFF \ -DCMAKE_C_COMPILER=mpicc \ -DCMAKE_INSTALL_PREFIX=. ) ``` To actually build, type: `make` To install the libraries, type: `make install` To run the installation test, type: `ctest` (The outputs are in file: `build/Testing/Temporary/LastTest.log`) or, `ctest -D Experimental` or, `ctest -D Nightly` **NOTE:** The parallel execution in ctest is invoked by "mpiexec" command which is from MPICH environment. If your MPI is not MPICH/mpiexec based, the test execution may fail. You can always go to TEST/ directory to perform testing manually. **Note on the C-Fortran name mangling handled by C preprocessor definition:** In the default setting, we assume that Fortran expects a C routine to have an underscore postfixed to the name. Depending on the compiler, you may need to define one of the following flags in during the cmake build to overwrite default setting: ``` cmake .. -DCMAKE_C_FLAGS="-DNoChange" cmake .. -DCMAKE_C_FLAGS="-DUpCase" ``` ## Windows Usage Prerequisites: CMake, Visual Studio, Microsoft HPC Pack This has been tested with Visual Studio 2017, without Parmetis, without Fortran, and with OpenMP disabled. The cmake configuration line used was ``` '/winsame/contrib-vs2017/cmake-3.9.4-ser/bin/cmake' \ -DCMAKE_INSTALL_PREFIX:PATH=C:/winsame/volatile-vs2017/superlu_dist-master.r147-parcomm \ -DCMAKE_BUILD_TYPE:STRING=Release \ -DCMAKE_COLOR_MAKEFILE:BOOL=FALSE \ -DCMAKE_VERBOSE_MAKEFILE:BOOL=TRUE \ -Denable_openmp:BOOL=FALSE \ -DCMAKE_C_COMPILER:FILEPATH='C:/Program Files (x86)/Microsoft Visual Studio/2017/Professional/VC/Tools/MSVC/14.11.25503/bin/HostX64/x64/cl.exe' \ -DCMAKE_C_FLAGS:STRING='/DWIN32 /D_WINDOWS /W3' \ -Denable_parmetislib:BOOL=FALSE \ -DXSDK_ENABLE_Fortran=OFF \ -G 'NMake Makefiles JOM' \ C:/path/to/superlu_dist ``` After configuring, simply do ``` jom # or nmake jom install # or nmake install ``` Libraries will be installed under C:/winsame/volatile-vs2017/superlu_dist-master.r147-parcomm/lib for the above configuration. If you wish to test: `ctest` ## READING SPARSE MATRIX FILES The SRC/ directory contains the following routines to read different file formats, they all have the similar calling sequence. ``` $ ls -l dread*.c dreadMM.c : Matrix Market, files with suffix .mtx dreadhb.c : Harrell-Boeing, files with suffix .rua dreadrb.c : Rutherford-Boeing, files with suffix .rb dreadtriple.c : triplet, with header dreadtriple_noheader.c : triplet, no header, which is also readable in Matlab ``` ## REFERENCES **[1]** SuperLU_DIST: A Scalable Distributed-Memory Sparse Direct Solver for Unsymmetric Linear Systems. Xiaoye S. Li and James W. Demmel. ACM Trans. on Math. Software, Vol. 29, No. 2, June 2003, pp. 110-140. **[2]** Parallel Symbolic Factorization for Sparse LU with Static Pivoting. L. Grigori, J. Demmel and X.S. Li. SIAM J. Sci. Comp., Vol. 29, Issue 3, 1289-1314, 2007. **[3]** A distributed CPU-GPU sparse direct solver. P. Sao, R. Vuduc and X.S. Li, Proc. of EuroPar-2014 Parallel Processing, August 25-29, 2014. Porto, Portugal. **Xiaoye S. Li**, Lawrence Berkeley National Lab, [xsli@lbl.gov](xsli@lbl.gov) **Laura Grigori**, INRIA, France, [laura.grigori@inria.fr](laura.grigori@inria.fr) **Piyush Sao**, Georgia Institute of Technology, [piyush.feynman@gmail.com](piyush.feynman@gmail.com) **Ichitaro Yamazaki**, Univ. of Tennessee, [ic.yamazaki@gmail.com](ic.yamazaki@gmail.com) ## RELEASE VERSIONS ``` October 15, 2003   Version 2.0 October 1, 2007   Version 2.1 Feburary 20, 2008 Version 2.2 October 15, 2008   Version 2.3 June 9, 2010 Version 2.4 November 23, 2010 Version 2.5 March 31, 2013 Version 3.3 October 1, 2014 Version 4.0 July 15, 2014 Version 4.1 September 25, 2015 Version 4.2 December 31, 2015 Version 4.3 April 8, 2016 Version 5.0.0 May 15, 2016 Version 5.1.0 October 4, 2016 Version 5.1.1 December 31, 2016 Version 5.1.3 September 30, 2017 Version 5.2.0 January 28, 2018 Version 5.3.0 ``` SuperLU_DIST_5.3.0/CMakeLists.txt0000644013363400111340000002170513233431301015401 0ustar xiaoyessg###################################################################### # # CMakeLists.txt for SUPERLU_DIST # ###################################################################### # Required version cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR) # Project version numbers project(SuperLU_DIST NONE) set(VERSION_MAJOR "5") set(VERSION_MINOR "3") set(VERSION_BugFix "0") set(PROJECT_VERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_BugFix}) list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") ###################################################################### # # IDEAS: xSDK standards module MESSAGE("\nProcess XSDK defaults ...") # SET(USE_XSDK_DEFAULTS_DEFAULT TRUE) # Set to false if desired INCLUDE("cmake/XSDKDefaults.cmake") INCLUDE(CTest) ###################################################################### ###################################################################### # # Usual initialization stuff # ###################################################################### set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) ## ???? set(CMAKE_INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/lib") #---- For shared library # use, i.e. don't skip the full RPATH for the build tree SET(CMAKE_SKIP_BUILD_RPATH FALSE) # when building, don't use the install RPATH already # (but later on when installing) SET(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE) # the RPATH to be used when installing set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") # add the automatically determined parts of the RPATH # which point to directories outside the build tree to the install RPATH SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) #---- SET(BUILD_STATIC_LIBS TRUE CACHE BOOL "Include static libs when building shared") if (BUILD_SHARED_LIBS) message("-- SuperLU_DIST will be built as a shared library.") set(PROJECT_NAME_LIB_EXPORT libsuperlu_dist.so) if (BUILD_STATIC_LIBS) message("-- SuperLU_DIST will also be built as a static library.") endif() else() message("-- SuperLU_DIST will be built as a static library.") set(PROJECT_NAME_LIB_EXPORT libsuperlu_dist.a) endif() enable_language (C) if (XSDK_ENABLE_Fortran) enable_language (Fortran) set(NOFORTRAN FALSE) endif() set(SUPERLU_VERSION "${PROJECT_VERSION}") set(SUPERLU_REV "${PROJECT_REV}") # The XSDK standard does not allow using internally built BLAS if (USE_XSDK_DEFAULTS) set(enable_blaslib_DEFAULT OFF) else() set(enable_blaslib_DEFAULT ON) endif() if (NOT CMAKE_INSTALL_PREFIX) set(CMAKE_INSTALL_PREFIX /usr/local) endif() # setup options option(enable_blaslib "Build the CBLAS library" ${enable_blaslib_DEFAULT}) option(enable_parmetislib "Build the ParMETIS library" ON) option(enable_doc "Build doxygen documentation" OFF) option(enable_double "Enable double precision library" ON) option(enable_complex16 "Enable complex16 precision library" ON) option(enable_tests "Build tests" ON) option(enable_examples "Build examples" ON) option(TPL_PARMETIS_LIBRARIES "List of absolute paths to ParMETIS link libraries [].") option(TPL_PARMETIS_INCLUDE_DIRS "List of absolute paths to ParMETIS include directories [].") if(NOT MSVC) include(GNUInstallDirs) set(default_install_inc_dir ${CMAKE_INSTALL_INCLUDEDIR}) set(default_install_lib_dir ${CMAKE_INSTALL_LIBDIR}) set(default_install_bin_dir ${CMAKE_INSTALL_BINDIR}) else() set(default_install_inc_dir "include") set(default_install_lib_dir "lib") set(default_install_bin_dir "bin") endif() set(INSTALL_INC_DIR "${default_install_inc_dir}" CACHE STRING "The folder where headers will be installed.") set(INSTALL_LIB_DIR "${default_install_lib_dir}" CACHE STRING "The folder where libraries will be installed.") set(INSTALL_BIN_DIR "${default_install_bin_dir}" CACHE STRING "The folder where runtime files will be installed.") # Set up required compiler defines and options. ## get_directory_property( DirDefs COMPILE_DEFINITIONS ) # set(CMAKE_C_FLAGS "-DDEBUGlevel=0 -DPRNTlevel=0 ${CMAKE_C_FLAGS}") if(XSDK_INDEX_SIZE EQUAL 64) message("-- Using 64 bit integer for index size") endif() set(CMAKE_C_FLAGS_RELEASE "-O3" CACHE STRING "") ###################################################################### # # Find packages # ###################################################################### # #--------------------- MPI --------------------- find_package(MPI) if(MPI_C_FOUND) set(CMAKE_C_FLAGS "${MPI_C_COMPILE_FLAGS} ${CMAKE_C_FLAGS}") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${MPI_C_LINK_FLAGS}" ) endif() #--------------------- OpenMP --------------------- if (NOT DEFINED enable_openmp) set(enable_openmp TRUE) endif () if (enable_openmp) find_package(OpenMP) ## include(FindOpenMP) # Strumpack uses this if(OPENMP_FOUND) set(CMAKE_C_FLAGS "${OpenMP_C_FLAGS} ${CMAKE_C_FLAGS}") # On edison, OpenMP_EXE_LINKER_FLAGS is empty # set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_C_FLAGS}") message("-- OpenMP_EXE_LINKER_FLAGS='${OpenMP_EXE_LINKER_FLAGS}'") message("-- CMAKE_EXE_LINKER_FLAGS='${CMAKE_EXE_LINKER_FLAGS}'") endif() endif() #--------------------- BLAS --------------------- if(NOT enable_blaslib) # set(TPL_BLAS_LIBRARIES "" CACHE FILEPATH # "Override of list of absolute path to libs for BLAS.") if(TPL_BLAS_LIBRARIES) set(BLAS_FOUND TRUE) else() find_package(BLAS) if(BLAS_FOUND) set(TPL_BLAS_LIBRARIES "${BLAS_LIBRARIES}" CACHE FILEPATH "Set from FindBLAS.cmake BLAS_LIBRARIES." FORCE) endif() endif() endif() if(BLAS_FOUND) message("-- Using TPL_BLAS_LIBRARIES='${TPL_BLAS_LIBRARIES}'") set(CMAKE_C_FLAGS "-DUSE_VENDOR_BLAS ${CMAKE_C_FLAGS}") set(BLAS_LIB ${TPL_BLAS_LIBRARIES}) # fix up BLAS library name string (REPLACE ";" " " BLAS_LIB_STR "${BLAS_LIB}") set(BLAS_LIB_EXPORT ${BLAS_LIB_STR}) else() message("-- Did not find or specify BLAS, so configure to build internal CBLAS ...") add_subdirectory(CBLAS) set(BLAS_LIB blas) if (BUILD_SHARED_LIBS) # export to be referenced by downstream makefile set(BLAS_LIB_EXPORT ${CMAKE_INSTALL_PREFIX}/CBLAS/libblas.so) else() set(BLAS_LIB_EXPORT ${CMAKE_INSTALL_PREFIX}/CBLAS/libblas.a) endif() endif() #--------------------- ParMETIS --------------------- if (enable_parmetislib) ## want to use parmetis if (NOT TPL_PARMETIS_LIBRARIES) message(FATAL_ERROR "TPL_PARMETIS_LIBRARIES option should be set for PARMETIS support to be enabled.") endif() if (NOT TPL_PARMETIS_INCLUDE_DIRS) message(FATAL_ERROR "TPL_PARMETIS_INCLUDE_DIRS option be set for PARMETIS support to be enabled.") endif() foreach(dir ${TPL_PARMETIS_INCLUDE_DIRS}) if (NOT EXISTS ${dir}) message(FATAL_ERROR "PARMETIS include directory not found: ${dir}") endif() set(CMAKE_C_FLAGS "-I${dir} ${CMAKE_C_FLAGS}") endforeach() message("-- Enabled support for PARMETIS") set(PARMETIS_FOUND TRUE) set(PARMETIS_LIB ${TPL_PARMETIS_LIBRARIES}) # fix up PARMETIS library names string (REPLACE ";" " " PARMETIS_LIB_STR "${PARMETIS_LIB}") set(PARMETIS_LIB_EXPORT ${PARMETIS_LIB_STR}) else() message("-- Will not link with ParMETIS.") endif() if(enable_parmetislib AND NOT PARMETIS_FOUND) find_package(ParMETIS) if(PARMETIS_FOUND) set(PARMETIS_LIB ParMETIS::ParMETIS) set(TPL_PARMETIS_INCLUDE_DIRS "") endif() endif() if(PARMETIS_FOUND) set(HAVE_PARMETIS TRUE) endif() ###################################################################### # # Include directories # ###################################################################### include_directories(${CMAKE_BINARY_DIR}/SRC) # For superlu_dist_config.h include_directories(${CMAKE_SOURCE_DIR}/SRC) if (TPL_PARMETIS_INCLUDE_DIRS) include_directories(${TPL_PARMETIS_INCLUDE_DIRS}) ## parmetis endif () include_directories(${MPI_C_INCLUDE_PATH}) ###################################################################### # # Add subdirectories # ###################################################################### add_subdirectory(SRC) if(enable_doc) message(FATAL_ERROR "Documentation build requested but not implemented.") #implement doxygen endif() if(enable_tests) enable_testing() add_subdirectory(TEST) endif() if(enable_examples) enable_testing() add_subdirectory(EXAMPLE) endif() # file(WRITE "make.defs" "# can be exposed to users" ${CMAKE_C_COMPILER}) # configure_file(${CMAKE_SOURCE_DIR}/make.inc.in ${CMAKE_BINARY_DIR}/make.inc) configure_file(${SuperLU_DIST_SOURCE_DIR}/make.inc.in ${SuperLU_DIST_SOURCE_DIR}/make.inc) configure_file(${SuperLU_DIST_SOURCE_DIR}/SRC/superlu_dist_config.h.in ${SuperLU_DIST_BINARY_DIR}/SRC/superlu_dist_config.h) configure_file(${SuperLU_DIST_SOURCE_DIR}/SRC/superlu_dist_config.h.in ${SuperLU_DIST_SOURCE_DIR}/SRC/superlu_dist_config.h) # Add pkg-config support configure_file(${CMAKE_CURRENT_SOURCE_DIR}/superlu_dist.pc.in ${CMAKE_CURRENT_BINARY_DIR}/superlu_dist.pc @ONLY) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/superlu_dist.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) SuperLU_DIST_5.3.0/EXAMPLE/0000755013363400111340000000000013233433411013733 5ustar xiaoyessgSuperLU_DIST_5.3.0/EXAMPLE/pzutil.c0000644013363400111340000004543613233431301015436 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Several matrix utilities * *
 * -- Distributed SuperLU routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * March 15, 2003
 * 
*/ #include #include "superlu_zdefs.h" /*! \brief Gather A from the distributed compressed row format to global A in compressed column format. */ int pzCompRow_loc_to_CompCol_global ( int_t need_value, /* Input. Whether need to gather numerical values */ SuperMatrix *A, /* Input. Distributed matrix in NRformat_loc format. */ gridinfo_t *grid, /* Input */ SuperMatrix *GA /* Output */ ) { NRformat_loc *Astore; NCformat *GAstore; doublecomplex *a, *a_loc; int_t *colind, *rowptr; int_t *colptr_loc, *rowind_loc; int_t m_loc, n, i, j, k, l; int_t colnnz, fst_row, m_loc_max, nnz_loc, nnz_max, nnz; doublecomplex *a_recv; /* Buffer to receive the blocks of values. */ doublecomplex *a_buf; /* Buffer to merge blocks into block columns. */ int_t *colcnt, *itemp; int_t *colptr_send; /* Buffer to redistribute the column pointers of the local block rows. Use n_loc+1 pointers for each block. */ int_t *colptr_blk; /* The column pointers for each block, after redistribution to the local block columns. Use n_loc+1 pointers for each block. */ int_t *rowind_recv; /* Buffer to receive the blocks of row indices. */ int_t *rowind_buf; /* Buffer to merge blocks into block columns. */ int_t *fst_rows, *n_locs; int *sendcnts, *sdispls, *recvcnts, *rdispls, *itemp_32; int it, n_loc, procs; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Enter pzCompRow_loc_to_CompCol_global"); #endif /* Initialization. */ n = A->ncol; Astore = (NRformat_loc *) A->Store; nnz_loc = Astore->nnz_loc; m_loc = Astore->m_loc; fst_row = Astore->fst_row; a = Astore->nzval; rowptr = Astore->rowptr; colind = Astore->colind; n_loc = m_loc; /* NOTE: CURRENTLY ONLY WORK FOR SQUARE MATRIX */ /* ------------------------------------------------------------ FIRST PHASE: TRANSFORM A INTO DISTRIBUTED COMPRESSED COLUMN. ------------------------------------------------------------*/ zCompRow_to_CompCol_dist(m_loc, n, nnz_loc, a, colind, rowptr, &a_loc, &rowind_loc, &colptr_loc); /* Change local row index numbers to global numbers. */ for (i = 0; i < nnz_loc; ++i) rowind_loc[i] += fst_row; #if ( DEBUGlevel>=2 ) printf("Proc %d\n", grid->iam); PrintInt10("rowind_loc", nnz_loc, rowind_loc); PrintInt10("colptr_loc", n+1, colptr_loc); #endif procs = grid->nprow * grid->npcol; if ( !(fst_rows = (int_t *) intMalloc_dist(2*procs)) ) ABORT("Malloc fails for fst_rows[]"); n_locs = fst_rows + procs; MPI_Allgather(&fst_row, 1, mpi_int_t, fst_rows, 1, mpi_int_t, grid->comm); for (i = 0; i < procs-1; ++i) n_locs[i] = fst_rows[i+1] - fst_rows[i]; n_locs[procs-1] = n - fst_rows[procs-1]; if ( !(recvcnts = SUPERLU_MALLOC(5*procs * sizeof(int))) ) ABORT("Malloc fails for recvcnts[]"); sendcnts = recvcnts + procs; rdispls = sendcnts + procs; sdispls = rdispls + procs; itemp_32 = sdispls + procs; /* All-to-all transfer column pointers of each block. Now the matrix view is P-by-P block-partition. */ /* n column starts for each column, and procs column ends for each block */ if ( !(colptr_send = intMalloc_dist(n + procs)) ) ABORT("Malloc fails for colptr_send[]"); if ( !(colptr_blk = intMalloc_dist( (((size_t) n_loc)+1)*procs)) ) ABORT("Malloc fails for colptr_blk[]"); for (i = 0, j = 0; i < procs; ++i) { for (k = j; k < j + n_locs[i]; ++k) colptr_send[i+k] = colptr_loc[k]; colptr_send[i+k] = colptr_loc[k]; /* Add an END marker */ sendcnts[i] = n_locs[i] + 1; #if ( DEBUGlevel>=1 ) assert(j == fst_rows[i]); #endif sdispls[i] = j + i; recvcnts[i] = n_loc + 1; rdispls[i] = i * (n_loc + 1); j += n_locs[i]; /* First column of next block in colptr_loc[] */ } MPI_Alltoallv(colptr_send, sendcnts, sdispls, mpi_int_t, colptr_blk, recvcnts, rdispls, mpi_int_t, grid->comm); /* Adjust colptr_blk[] so that they contain the local indices of the column pointers in the receive buffer. */ nnz = 0; /* The running sum of the nonzeros counted by far */ k = 0; for (i = 0; i < procs; ++i) { for (j = rdispls[i]; j < rdispls[i] + n_loc; ++j) { colnnz = colptr_blk[j+1] - colptr_blk[j]; /*assert(k<=j);*/ colptr_blk[k] = nnz; nnz += colnnz; /* Start of the next column */ ++k; } colptr_blk[k++] = nnz; /* Add an END marker for each block */ } /*assert(k == (n_loc+1)*procs);*/ /* Now prepare to transfer row indices and values. */ sdispls[0] = 0; for (i = 0; i < procs-1; ++i) { sendcnts[i] = colptr_loc[fst_rows[i+1]] - colptr_loc[fst_rows[i]]; sdispls[i+1] = sdispls[i] + sendcnts[i]; } sendcnts[procs-1] = colptr_loc[n] - colptr_loc[fst_rows[procs-1]]; for (i = 0; i < procs; ++i) { j = rdispls[i]; /* Point to this block in colptr_blk[]. */ recvcnts[i] = colptr_blk[j+n_loc] - colptr_blk[j]; } rdispls[0] = 0; /* Recompute rdispls[] for row indices. */ for (i = 0; i < procs-1; ++i) rdispls[i+1] = rdispls[i] + recvcnts[i]; k = rdispls[procs-1] + recvcnts[procs-1]; /* Total received */ if ( !(rowind_recv = (int_t *) intMalloc_dist(2*k)) ) ABORT("Malloc fails for rowind_recv[]"); rowind_buf = rowind_recv + k; MPI_Alltoallv(rowind_loc, sendcnts, sdispls, mpi_int_t, rowind_recv, recvcnts, rdispls, mpi_int_t, grid->comm); if ( need_value ) { if ( !(a_recv = (doublecomplex *) doublecomplexMalloc_dist(2*k)) ) ABORT("Malloc fails for rowind_recv[]"); a_buf = a_recv + k; MPI_Alltoallv(a_loc, sendcnts, sdispls, SuperLU_MPI_DOUBLE_COMPLEX, a_recv, recvcnts, rdispls, SuperLU_MPI_DOUBLE_COMPLEX, grid->comm); } /* Reset colptr_loc[] to point to the n_loc global columns. */ colptr_loc[0] = 0; itemp = colptr_send; for (j = 0; j < n_loc; ++j) { colnnz = 0; for (i = 0; i < procs; ++i) { k = i * (n_loc + 1) + j; /* j-th column in i-th block */ colnnz += colptr_blk[k+1] - colptr_blk[k]; } colptr_loc[j+1] = colptr_loc[j] + colnnz; itemp[j] = colptr_loc[j]; /* Save a copy of the column starts */ } itemp[n_loc] = colptr_loc[n_loc]; /* Merge blocks of row indices into columns of row indices. */ for (i = 0; i < procs; ++i) { k = i * (n_loc + 1); for (j = 0; j < n_loc; ++j) { /* i-th block */ for (l = colptr_blk[k+j]; l < colptr_blk[k+j+1]; ++l) { rowind_buf[itemp[j]] = rowind_recv[l]; ++itemp[j]; } } } if ( need_value ) { for (j = 0; j < n_loc+1; ++j) itemp[j] = colptr_loc[j]; for (i = 0; i < procs; ++i) { k = i * (n_loc + 1); for (j = 0; j < n_loc; ++j) { /* i-th block */ for (l = colptr_blk[k+j]; l < colptr_blk[k+j+1]; ++l) { a_buf[itemp[j]] = a_recv[l]; ++itemp[j]; } } } } /* ------------------------------------------------------------ SECOND PHASE: GATHER TO GLOBAL A IN COMPRESSED COLUMN FORMAT. ------------------------------------------------------------*/ GA->nrow = A->nrow; GA->ncol = A->ncol; GA->Stype = SLU_NC; GA->Dtype = A->Dtype; GA->Mtype = A->Mtype; GAstore = GA->Store = (NCformat *) SUPERLU_MALLOC ( sizeof(NCformat) ); if ( !GAstore ) ABORT ("SUPERLU_MALLOC fails for GAstore"); /* First gather the size of each piece. */ nnz_loc = colptr_loc[n_loc]; MPI_Allgather(&nnz_loc, 1, mpi_int_t, itemp, 1, mpi_int_t, grid->comm); for (i = 0, nnz = 0; i < procs; ++i) nnz += itemp[i]; GAstore->nnz = nnz; if ( !(GAstore->rowind = (int_t *) intMalloc_dist (nnz)) ) ABORT ("SUPERLU_MALLOC fails for GAstore->rowind[]"); if ( !(GAstore->colptr = (int_t *) intMalloc_dist (n+1)) ) ABORT ("SUPERLU_MALLOC fails for GAstore->colptr[]"); /* Allgatherv for row indices. */ rdispls[0] = 0; for (i = 0; i < procs-1; ++i) { rdispls[i+1] = rdispls[i] + itemp[i]; itemp_32[i] = itemp[i]; } itemp_32[procs-1] = itemp[procs-1]; it = nnz_loc; MPI_Allgatherv(rowind_buf, it, mpi_int_t, GAstore->rowind, itemp_32, rdispls, mpi_int_t, grid->comm); if ( need_value ) { if ( !(GAstore->nzval = (doublecomplex *) doublecomplexMalloc_dist (nnz)) ) ABORT ("SUPERLU_MALLOC fails for GAstore->rnzval[]"); MPI_Allgatherv(a_buf, it, SuperLU_MPI_DOUBLE_COMPLEX, GAstore->nzval, itemp_32, rdispls, SuperLU_MPI_DOUBLE_COMPLEX, grid->comm); } else GAstore->nzval = NULL; /* Now gather the column pointers. */ rdispls[0] = 0; for (i = 0; i < procs-1; ++i) { rdispls[i+1] = rdispls[i] + n_locs[i]; itemp_32[i] = n_locs[i]; } itemp_32[procs-1] = n_locs[procs-1]; MPI_Allgatherv(colptr_loc, n_loc, mpi_int_t, GAstore->colptr, itemp_32, rdispls, mpi_int_t, grid->comm); /* Recompute column pointers. */ for (i = 1; i < procs; ++i) { k = rdispls[i]; for (j = 0; j < n_locs[i]; ++j) GAstore->colptr[k++] += itemp[i-1]; itemp[i] += itemp[i-1]; /* prefix sum */ } GAstore->colptr[n] = nnz; #if ( DEBUGlevel>=2 ) if ( !grid->iam ) { printf("After pdCompRow_loc_to_CompCol_global()\n"); zPrint_CompCol_Matrix_dist(GA); } #endif SUPERLU_FREE(a_loc); SUPERLU_FREE(rowind_loc); SUPERLU_FREE(colptr_loc); SUPERLU_FREE(fst_rows); SUPERLU_FREE(recvcnts); SUPERLU_FREE(colptr_send); SUPERLU_FREE(colptr_blk); SUPERLU_FREE(rowind_recv); if ( need_value) SUPERLU_FREE(a_recv); #if ( DEBUGlevel>=1 ) if ( !grid->iam ) printf("sizeof(NCformat) %d\n", sizeof(NCformat)); CHECK_MALLOC(grid->iam, "Exit pzCompRow_loc_to_CompCol_global"); #endif return 0; } /* pzCompRow_loc_to_CompCol_global */ /*! \brief Permute the distributed dense matrix: B <= perm(X). perm[i] = j means the i-th row of X is in the j-th row of B. */ int pzPermute_Dense_Matrix ( int_t fst_row, int_t m_loc, int_t row_to_proc[], int_t perm[], doublecomplex X[], int ldx, doublecomplex B[], int ldb, int nrhs, gridinfo_t *grid ) { int_t i, j, k, l; int p, procs; int *sendcnts, *sendcnts_nrhs, *recvcnts, *recvcnts_nrhs; int *sdispls, *sdispls_nrhs, *rdispls, *rdispls_nrhs; int *ptr_to_ibuf, *ptr_to_dbuf; int_t *send_ibuf, *recv_ibuf; doublecomplex *send_dbuf, *recv_dbuf; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Enter pzPermute_Dense_Matrix()"); #endif procs = grid->nprow * grid->npcol; if ( !(sendcnts = SUPERLU_MALLOC(10*procs * sizeof(int))) ) ABORT("Malloc fails for sendcnts[]."); sendcnts_nrhs = sendcnts + procs; recvcnts = sendcnts_nrhs + procs; recvcnts_nrhs = recvcnts + procs; sdispls = recvcnts_nrhs + procs; sdispls_nrhs = sdispls + procs; rdispls = sdispls_nrhs + procs; rdispls_nrhs = rdispls + procs; ptr_to_ibuf = rdispls_nrhs + procs; ptr_to_dbuf = ptr_to_ibuf + procs; for (i = 0; i < procs; ++i) sendcnts[i] = 0; /* Count the number of X entries to be sent to each process.*/ for (i = fst_row; i < fst_row + m_loc; ++i) { p = row_to_proc[perm[i]]; ++sendcnts[p]; } MPI_Alltoall(sendcnts, 1, MPI_INT, recvcnts, 1, MPI_INT, grid->comm); sdispls[0] = rdispls[0] = 0; sdispls_nrhs[0] = rdispls_nrhs[0] = 0; sendcnts_nrhs[0] = sendcnts[0] * nrhs; recvcnts_nrhs[0] = recvcnts[0] * nrhs; for (i = 1; i < procs; ++i) { sdispls[i] = sdispls[i-1] + sendcnts[i-1]; sdispls_nrhs[i] = sdispls[i] * nrhs; rdispls[i] = rdispls[i-1] + recvcnts[i-1]; rdispls_nrhs[i] = rdispls[i] * nrhs; sendcnts_nrhs[i] = sendcnts[i] * nrhs; recvcnts_nrhs[i] = recvcnts[i] * nrhs; } k = sdispls[procs-1] + sendcnts[procs-1];/* Total number of sends */ l = rdispls[procs-1] + recvcnts[procs-1];/* Total number of recvs */ /*assert(k == m_loc);*/ /*assert(l == m_loc);*/ if ( !(send_ibuf = intMalloc_dist(k + l)) ) ABORT("Malloc fails for send_ibuf[]."); recv_ibuf = send_ibuf + k; if ( !(send_dbuf = doublecomplexMalloc_dist((k + l)*nrhs)) ) ABORT("Malloc fails for send_dbuf[]."); recv_dbuf = send_dbuf + k * nrhs; for (i = 0; i < procs; ++i) { ptr_to_ibuf[i] = sdispls[i]; ptr_to_dbuf[i] = sdispls_nrhs[i]; } /* Fill in the send buffers: send_ibuf[] and send_dbuf[]. */ for (i = fst_row; i < fst_row + m_loc; ++i) { j = perm[i]; p = row_to_proc[j]; send_ibuf[ptr_to_ibuf[p]] = j; j = ptr_to_dbuf[p]; RHS_ITERATE(k) { /* RHS stored in row major in the buffer */ send_dbuf[j++] = X[i-fst_row + k*ldx]; } ++ptr_to_ibuf[p]; ptr_to_dbuf[p] += nrhs; } /* Transfer the (permuted) row indices and numerical values. */ MPI_Alltoallv(send_ibuf, sendcnts, sdispls, mpi_int_t, recv_ibuf, recvcnts, rdispls, mpi_int_t, grid->comm); MPI_Alltoallv(send_dbuf, sendcnts_nrhs, sdispls_nrhs, SuperLU_MPI_DOUBLE_COMPLEX, recv_dbuf, recvcnts_nrhs, rdispls_nrhs, SuperLU_MPI_DOUBLE_COMPLEX, grid->comm); /* Copy the buffer into b. */ for (i = 0, l = 0; i < m_loc; ++i) { j = recv_ibuf[i] - fst_row; /* Relative row number */ RHS_ITERATE(k) { /* RHS stored in row major in the buffer */ B[j + k*ldb] = recv_dbuf[l++]; } } SUPERLU_FREE(sendcnts); SUPERLU_FREE(send_ibuf); SUPERLU_FREE(send_dbuf); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Exit pzPermute_Dense_Matrix()"); #endif return 0; } /* pzPermute_Dense_Matrix */ /*! \brief Initialize the data structure for the solution phase. */ int zSolveInit(superlu_options_t *options, SuperMatrix *A, int_t perm_r[], int_t perm_c[], int_t nrhs, LUstruct_t *LUstruct, gridinfo_t *grid, SOLVEstruct_t *SOLVEstruct) { int_t *row_to_proc, *inv_perm_c, *itemp; NRformat_loc *Astore; int_t i, fst_row, m_loc, p; int procs; Astore = (NRformat_loc *) A->Store; fst_row = Astore->fst_row; m_loc = Astore->m_loc; procs = grid->nprow * grid->npcol; if ( !grid->iam ) printf("@@@ enter zSolveInit, A->nrow %d\n", A->nrow); if ( !(row_to_proc = intMalloc_dist(A->nrow)) ) ABORT("Malloc fails for row_to_proc[]"); if ( !grid->iam ) { printf("@@@ malloc(1) zSolveInit\n"); fflush(stdout); } SOLVEstruct->row_to_proc = row_to_proc; if ( !(inv_perm_c = intMalloc_dist(A->ncol)) ) ABORT("Malloc fails for inv_perm_c[]."); if ( !grid->iam ) { printf("@@@ malloc(2) zSolveInit\n"); fflush(stdout); } for (i = 0; i < A->ncol; ++i) inv_perm_c[perm_c[i]] = i; SOLVEstruct->inv_perm_c = inv_perm_c; if ( !grid->iam ) printf("@@@ after malloc zSolveInit\n"); /* ------------------------------------------------------------ EVERY PROCESS NEEDS TO KNOW GLOBAL PARTITION. SET UP THE MAPPING BETWEEN ROWS AND PROCESSES. NOTE: For those processes that do not own any row, it must must be set so that fst_row == A->nrow. ------------------------------------------------------------*/ if ( !(itemp = intMalloc_dist(procs+1)) ) ABORT("Malloc fails for itemp[]"); MPI_Allgather(&fst_row, 1, mpi_int_t, itemp, 1, mpi_int_t, grid->comm); itemp[procs] = A->nrow; for (p = 0; p < procs; ++p) { for (i = itemp[p] ; i < itemp[p+1]; ++i) row_to_proc[i] = p; } if ( !grid->iam ) printf("@@@ after allgather zSolveInit\n"); #define DEBUGlevel 2 #if ( DEBUGlevel>=2 ) if ( !grid->iam ) { printf("fst_row = %d\n", fst_row); PrintInt10("row_to_proc", A->nrow, row_to_proc); PrintInt10("inv_perm_c", A->ncol, inv_perm_c); } #endif SUPERLU_FREE(itemp); #if 0 /* Compute the mapping between rows and processes. */ /* XSL NOTE: What happens if # of mapped processes is smaller than total Procs? For the processes without any row, let fst_row be EMPTY (-1). Make sure this case works! */ MPI_Allgather(&fst_row, 1, mpi_int_t, itemp, 1, mpi_int_t, grid->comm); itemp[procs] = n; for (p = 0; p < procs; ++p) { j = itemp[p]; if ( j != EMPTY ) { k = itemp[p+1]; if ( k == EMPTY ) k = n; for (i = j ; i < k; ++i) row_to_proc[i] = p; } } #endif get_diag_procs(A->ncol, LUstruct->Glu_persist, grid, &SOLVEstruct->num_diag_procs, &SOLVEstruct->diag_procs, &SOLVEstruct->diag_len); if ( !(SOLVEstruct->gstrs_comm = (pxgstrs_comm_t *) SUPERLU_MALLOC(sizeof(pxgstrs_comm_t))) ) ABORT("Malloc fails for gstrs_comm[]"); pxgstrs_init(A->ncol, m_loc, nrhs, fst_row, perm_r, perm_c, grid, LUstruct->Glu_persist, SOLVEstruct); if ( !(SOLVEstruct->gsmv_comm = (pzgsmv_comm_t *) SUPERLU_MALLOC(sizeof(pzgsmv_comm_t))) ) ABORT("Malloc fails for gsmv_comm[]"); SOLVEstruct->A_colind_gsmv = NULL; options->SolveInitialized = YES; return 0; } /* zSolveInit */ /*! \brief Release the resources used for the solution phase. */ void zSolveFinalize(superlu_options_t *options, SOLVEstruct_t *SOLVEstruct) { int_t *it; pxgstrs_finalize(SOLVEstruct->gstrs_comm); if ( options->RefineInitialized ) { pzgsmv_finalize(SOLVEstruct->gsmv_comm); options->RefineInitialized = NO; } SUPERLU_FREE(SOLVEstruct->gsmv_comm); SUPERLU_FREE(SOLVEstruct->row_to_proc); SUPERLU_FREE(SOLVEstruct->inv_perm_c); SUPERLU_FREE(SOLVEstruct->diag_procs); SUPERLU_FREE(SOLVEstruct->diag_len); if ( it = SOLVEstruct->A_colind_gsmv ) SUPERLU_FREE(it); options->SolveInitialized = NO; } /* zSolveFinalize */ /*! \brief Check the inf-norm of the error vector */ void pzinf_norm_error(int iam, int_t n, int_t nrhs, doublecomplex x[], int_t ldx, doublecomplex xtrue[], int_t ldxtrue, gridinfo_t *grid) { double err, xnorm, temperr, tempxnorm; doublecomplex *x_work, *xtrue_work; doublecomplex temp; int i, j; for (j = 0; j < nrhs; j++) { x_work = &x[j*ldx]; xtrue_work = &xtrue[j*ldxtrue]; err = xnorm = 0.0; for (i = 0; i < n; i++) { z_sub(&temp, &x_work[i], &xtrue_work[i]); err = SUPERLU_MAX(err, slud_z_abs(&temp)); xnorm = SUPERLU_MAX(xnorm, slud_z_abs(&x_work[i])); } /* get the golbal max err & xnrom */ temperr = err; tempxnorm = xnorm; MPI_Allreduce( &temperr, &err, 1, MPI_DOUBLE, MPI_MAX, grid->comm); MPI_Allreduce( &tempxnorm, &xnorm, 1, MPI_DOUBLE, MPI_MAX, grid->comm); err = err / xnorm; if ( !iam ) printf("\tSol %2d: ||X-Xtrue||/||X|| = %e\n", j, err); } } SuperLU_DIST_5.3.0/EXAMPLE/Makefile0000644013363400111340000001052013233431301015365 0ustar xiaoyessg####################################################################### # # This makefile creates the example programs for the linear equation # routines in SuperLU_DIST. # # The command # make # without any arguments creates all the example programs. # The command # make double # creates double precision real example programs. # The command # make complex16 # creates double precision complex example programs. # # The executable files are called # double real: pddrive pddrive_ABglobal pddrive1 # pddrive1_ABglobal pddrive2 pddrive3 pddrive4 # double complex: pzdrive pzdrive_ABglobal pzdrive1 # pzdrive1_ABglobal pzdrive2 pzdrive3 pzdrive4 # # Alternatively, you can create example programs individually by # typing the command (for example) # make pddrive # # To remove the object files after the executable files have been # created, enter # make clean # ####################################################################### include ../make.inc INCLUDEDIR = -I../SRC DEXM = pddrive.o dcreate_matrix.o sp_ienv.o #pdgstrf2.o #pdgssvx.o # pdgstrs_lsum_X1.o pdgstrf_X1.o DEXM1 = pddrive1.o dcreate_matrix.o DEXM2 = pddrive2.o dcreate_matrix.o dcreate_matrix_perturbed.o DEXM3 = pddrive3.o dcreate_matrix.o DEXM4 = pddrive4.o dcreate_matrix.o DEXMG = pddrive_ABglobal.o DEXMG1 = pddrive1_ABglobal.o DEXMG2 = pddrive2_ABglobal.o DEXMG3 = pddrive3_ABglobal.o DEXMG4 = pddrive4_ABglobal.o ZEXM = pzdrive.o zcreate_matrix.o #pzgstrf2.o pzgstrf_v3.3.o pzgstrf.o ZEXM1 = pzdrive1.o zcreate_matrix.o ZEXM2 = pzdrive2.o zcreate_matrix.o zcreate_matrix_perturbed.o ZEXM3 = pzdrive3.o zcreate_matrix.o ZEXM4 = pzdrive4.o zcreate_matrix.o ZEXMG = pzdrive_ABglobal.o ZEXMG1 = pzdrive1_ABglobal.o ZEXMG2 = pzdrive2_ABglobal.o ZEXMG3 = pzdrive3_ABglobal.o ZEXMG4 = pzdrive4_ABglobal.o all: double complex16 double: pddrive pddrive1 pddrive2 pddrive3 pddrive4 \ pddrive_ABglobal pddrive1_ABglobal pddrive2_ABglobal \ pddrive3_ABglobal pddrive4_ABglobal complex16: pzdrive pzdrive1 pzdrive2 pzdrive3 pzdrive4 \ pzdrive_ABglobal pzdrive1_ABglobal pzdrive2_ABglobal \ pzdrive3_ABglobal pzdrive4_ABglobal pddrive: $(DEXM) $(DSUPERLULIB) $(LOADER) $(LOADOPTS) $(DEXM) $(LIBS) -lm -o $@ pddrive1: $(DEXM1) $(DSUPERLULIB) $(LOADER) $(LOADOPTS) $(DEXM1) $(LIBS) -lm -o $@ pddrive2: $(DEXM2) $(DSUPERLULIB) $(LOADER) $(LOADOPTS) $(DEXM2) $(LIBS) -lm -o $@ pddrive3: $(DEXM3) $(DSUPERLULIB) $(LOADER) $(LOADOPTS) $(DEXM3) $(LIBS) -lm -o $@ pddrive4: $(DEXM4) $(DSUPERLULIB) $(LOADER) $(LOADOPTS) $(DEXM4) $(LIBS) -lm -o $@ pddrive_ABglobal: $(DEXMG) $(DSUPERLULIB) $(LOADER) $(LOADOPTS) $(DEXMG) $(LIBS) -lm -o $@ pddrive1_ABglobal: $(DEXMG1) $(DSUPERLULIB) $(LOADER) $(LOADOPTS) $(DEXMG1) $(LIBS) -lm -o $@ pddrive2_ABglobal: $(DEXMG2) $(DSUPERLULIB) $(LOADER) $(LOADOPTS) $(DEXMG2) $(LIBS) -lm -o $@ pddrive3_ABglobal: $(DEXMG3) $(DSUPERLULIB) $(LOADER) $(LOADOPTS) $(DEXMG3) $(LIBS) -lm -o $@ pddrive4_ABglobal: $(DEXMG4) $(DSUPERLULIB) $(LOADER) $(LOADOPTS) $(DEXMG4) $(LIBS) -lm -o $@ pzdrive: $(ZEXM) $(DSUPERLULIB) $(LOADER) $(LOADOPTS) $(ZEXM) $(LIBS) -lm -o $@ pzdrive_triple: $(ZEXM) $(DSUPERLULIB) $(LOADER) $(LOADOPTS) $(ZEXM) $(LIBS) -lm -o $@ pzdrive1: $(ZEXM1) $(DSUPERLULIB) $(LOADER) $(LOADOPTS) $(ZEXM1) $(LIBS) -lm -o $@ pzdrive2: $(ZEXM2) $(DSUPERLULIB) $(LOADER) $(LOADOPTS) $(ZEXM2) $(LIBS) -lm -o $@ pzdrive3: $(ZEXM3) $(DSUPERLULIB) $(LOADER) $(LOADOPTS) $(ZEXM3) $(LIBS) -lm -o $@ pzdrive4: $(ZEXM4) $(DSUPERLULIB) $(LOADER) $(LOADOPTS) $(ZEXM4) $(LIBS) -lm -o $@ pzdrive_ABglobal: $(ZEXMG) $(DSUPERLULIB) $(LOADER) $(LOADOPTS) $(ZEXMG) $(LIBS) -lm -o $@ pzdrive1_ABglobal: $(ZEXMG1) $(DSUPERLULIB) $(LOADER) $(LOADOPTS) $(ZEXMG1) $(LIBS) -lm -o $@ pzdrive2_ABglobal: $(ZEXMG2) $(DSUPERLULIB) $(LOADER) $(LOADOPTS) $(ZEXMG2) $(LIBS) -lm -o $@ pzdrive3_ABglobal: $(ZEXMG3) $(DSUPERLULIB) $(LOADER) $(LOADOPTS) $(ZEXMG3) $(LIBS) -lm -o $@ pzdrive4_ABglobal: $(ZEXMG4) $(DSUPERLULIB) $(LOADER) $(LOADOPTS) $(ZEXMG4) $(LIBS) -lm -o $@ #pdgstrf.o: dscatter.c dSchCompUdt-cuda.c pdgstrf.c # $(CC) $(CFLAGS) $(CDEFS) $(BLASDEF) $(INCLUDEDIR) -c pdgstrf.c $(VERBOSE) .c.o: $(CC) $(CFLAGS) $(CDEFS) $(BLASDEF) $(INCLUDEDIR) -c $< $(VERBOSE) .f.o: $(FORTRAN) $(FFLAGS) -c $< $(VERBOSE) clean: rm -f *.o p[dz]drive p[dz]drive[1-9] \ p[dz]drive_ABglobal p[dz]drive[1-9]_ABglobal SuperLU_DIST_5.3.0/EXAMPLE/zcreate_matrix_perturbed.c0000644013363400111340000001574413233431301021203 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Read the matrix from data file * *
 * -- Distributed SuperLU routine (version 5.1.3) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * December 31, 2016
 * 
*/ #include #include "superlu_zdefs.h" /* \brief * *
 * Purpose
 * =======
 * 
 * ZCREATE_MATRIX_PERTURBED read the matrix from data file in
 * Harwell-Boeing format, and distribute it to processors in a distributed
 * compressed row format. It also generate the distributed true solution X
 * and the right-hand side RHS.
 *
 * Arguments   
 * =========      
 *
 * A     (output) SuperMatrix*
 *       Local matrix A in NR_loc format. 
 *
 * NRHS  (input) int_t
 *       Number of right-hand sides.
 *
 * RHS   (output) doublecomplex**
 *       The right-hand side matrix.
 *
 * LDB   (output) int*
 *       Leading dimension of the right-hand side matrix.
 *
 * X     (output) doublecomplex**
 *       The true solution matrix.
 *
 * LDX   (output) int*
 *       The leading dimension of the true solution matrix.
 *
 * FP    (input) FILE*
 *       The matrix file pointer.
 *
 * GRID  (input) gridinof_t*
 *       The 2D process mesh.
 * 
*/ int zcreate_matrix_perturbed(SuperMatrix *A, int nrhs, doublecomplex **rhs, int *ldb, doublecomplex **x, int *ldx, FILE *fp, gridinfo_t *grid) { SuperMatrix GA; /* global A */ doublecomplex *b_global, *xtrue_global; /* replicated on all processes */ int_t *rowind, *colptr; /* global */ doublecomplex *nzval; /* global */ doublecomplex *nzval_loc; /* local */ int_t *colind, *rowptr; /* local */ int_t m, n, nnz; int_t m_loc, fst_row, nnz_loc; int_t m_loc_fst; /* Record m_loc of the first p-1 processors, when mod(m, p) is not zero. */ int_t row, col, i, j, relpos; int iam; char trans[1]; int_t *marker; iam = grid->iam; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter zcreate_matrix()"); #endif if ( !iam ) { /* Read the matrix stored on disk in Harwell-Boeing format. */ zreadhb_dist(iam, fp, &m, &n, &nnz, &nzval, &rowind, &colptr); /* Broadcast matrix A to the other PEs. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( nzval, nnz, SuperLU_MPI_DOUBLE_COMPLEX, 0, grid->comm ); MPI_Bcast( rowind, nnz, mpi_int_t, 0, grid->comm ); MPI_Bcast( colptr, n+1, mpi_int_t, 0, grid->comm ); } else { /* Receive matrix A from PE 0. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid->comm ); /* Allocate storage for compressed column representation. */ zallocateA_dist(n, nnz, &nzval, &rowind, &colptr); MPI_Bcast( nzval, nnz, SuperLU_MPI_DOUBLE_COMPLEX, 0, grid->comm ); MPI_Bcast( rowind, nnz, mpi_int_t, 0, grid->comm ); MPI_Bcast( colptr, n+1, mpi_int_t, 0, grid->comm ); } /* Perturbed the 1st and last diagonal of the matrix to lower values. Intention is to change perm_r[]. */ nzval[0].r *= 0.01; nzval[0].i *= 0.01; nzval[nnz-1].r *= 0.0001; nzval[nnz-1].i *= 0.0001; /* Compute the number of rows to be distributed to local process */ m_loc = m / (grid->nprow * grid->npcol); m_loc_fst = m_loc; /* When m / procs is not an integer */ if ((m_loc * grid->nprow * grid->npcol) != m) { /*m_loc = m_loc+1; m_loc_fst = m_loc;*/ if (iam == (grid->nprow * grid->npcol - 1)) /* last proc. gets all*/ m_loc = m - m_loc * (grid->nprow * grid->npcol - 1); } /* Create compressed column matrix for GA. */ zCreate_CompCol_Matrix_dist(&GA, m, n, nnz, nzval, rowind, colptr, SLU_NC, SLU_Z, SLU_GE); /* Generate the exact solution and compute the right-hand side. */ if ( !(b_global = doublecomplexMalloc_dist(m*nrhs)) ) ABORT("Malloc fails for b[]"); if ( !(xtrue_global = doublecomplexMalloc_dist(n*nrhs)) ) ABORT("Malloc fails for xtrue[]"); *trans = 'N'; zGenXtrue_dist(n, nrhs, xtrue_global, n); zFillRHS_dist(trans, nrhs, xtrue_global, n, &GA, b_global, m); /************************************************* * Change GA to a local A with NR_loc format * *************************************************/ rowptr = (int_t *) intMalloc_dist(m_loc+1); marker = (int_t *) intCalloc_dist(n); /* Get counts of each row of GA */ for (i = 0; i < n; ++i) for (j = colptr[i]; j < colptr[i+1]; ++j) ++marker[rowind[j]]; /* Set up row pointers */ rowptr[0] = 0; fst_row = iam * m_loc_fst; nnz_loc = 0; for (j = 0; j < m_loc; ++j) { row = fst_row + j; rowptr[j+1] = rowptr[j] + marker[row]; marker[j] = rowptr[j]; } nnz_loc = rowptr[m_loc]; nzval_loc = (doublecomplex *) doublecomplexMalloc_dist(nnz_loc); colind = (int_t *) intMalloc_dist(nnz_loc); /* Transfer the matrix into the compressed row storage */ for (i = 0; i < n; ++i) { for (j = colptr[i]; j < colptr[i+1]; ++j) { row = rowind[j]; if ( (row>=fst_row) && (row=2 ) if ( !iam ) zPrint_CompCol_Matrix_dist(&GA); #endif /* Destroy GA */ Destroy_CompCol_Matrix_dist(&GA); /******************************************************/ /* Change GA to a local A with NR_loc format */ /******************************************************/ /* Set up the local A in NR_loc format */ zCreate_CompRowLoc_Matrix_dist(A, m, n, nnz_loc, m_loc, fst_row, nzval_loc, colind, rowptr, SLU_NR_loc, SLU_Z, SLU_GE); /* Get the local B */ if ( !((*rhs) = doublecomplexMalloc_dist(m_loc*nrhs)) ) ABORT("Malloc fails for rhs[]"); for (j =0; j < nrhs; ++j) { for (i = 0; i < m_loc; ++i) { row = fst_row + i; (*rhs)[j*m_loc+i] = b_global[j*n+row]; } } *ldb = m_loc; /* Set the true X */ *ldx = m_loc; if ( !((*x) = doublecomplexMalloc_dist(*ldx * nrhs)) ) ABORT("Malloc fails for x_loc[]"); /* Get the local part of xtrue_global */ for (j = 0; j < nrhs; ++j) { for (i = 0; i < m_loc; ++i) (*x)[i + j*(*ldx)] = xtrue_global[i + fst_row + j*n]; } SUPERLU_FREE(b_global); SUPERLU_FREE(xtrue_global); SUPERLU_FREE(marker); #if ( DEBUGlevel>=1 ) printf("sizeof(NRforamt_loc) %lu\n", sizeof(NRformat_loc)); CHECK_MALLOC(iam, "Exit zcreate_matrix()"); #endif return 0; } SuperLU_DIST_5.3.0/EXAMPLE/pzdrive.c0000644013363400111340000001523313233431301015562 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Driver program for PZGSSVX example * *
 * -- Distributed SuperLU routine (version 4.1) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * November 1, 2007
 * April 5, 2015
 * 
*/ #include #include "superlu_zdefs.h" /*! \brief * *
 * Purpose
 * =======
 *
 * The driver program PZDRIVE.
 *
 * This example illustrates how to use PZGSSVX with the full
 * (default) options to solve a linear system.
 * 
 * Five basic steps are required:
 *   1. Initialize the MPI environment and the SuperLU process grid
 *   2. Set up the input matrix and the right-hand side
 *   3. Set the options argument
 *   4. Call pzgssvx
 *   5. Release the process grid and terminate the MPI environment
 *
 * With MPICH,  program may be run by typing:
 *    mpiexec -n  pzdrive -r  -c  big.rua
 * 
*/ int main(int argc, char *argv[]) { superlu_dist_options_t options; SuperLUStat_t stat; SuperMatrix A; ScalePermstruct_t ScalePermstruct; LUstruct_t LUstruct; SOLVEstruct_t SOLVEstruct; gridinfo_t grid; double *berr; doublecomplex *b, *xtrue; int m, n; int nprow, npcol; int iam, info, ldb, ldx, nrhs; char **cpp, c; FILE *fp, *fopen(); int cpp_defs(); nprow = 1; /* Default process rows. */ npcol = 1; /* Default process columns. */ nrhs = 1; /* Number of right-hand side. */ /* ------------------------------------------------------------ INITIALIZE MPI ENVIRONMENT. ------------------------------------------------------------*/ MPI_Init( &argc, &argv ); /* Parse command line argv[]. */ for (cpp = argv+1; *cpp; ++cpp) { if ( **cpp == '-' ) { c = *(*cpp+1); ++cpp; switch (c) { case 'h': printf("Options:\n"); printf("\t-r : process rows (default %4d)\n", nprow); printf("\t-c : process columns (default %4d)\n", npcol); exit(0); break; case 'r': nprow = atoi(*cpp); break; case 'c': npcol = atoi(*cpp); break; } } else { /* Last arg is considered a filename */ if ( !(fp = fopen(*cpp, "r")) ) { ABORT("File does not exist"); } break; } } /* ------------------------------------------------------------ INITIALIZE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ superlu_gridinit(MPI_COMM_WORLD, nprow, npcol, &grid); /* Bail out if I do not belong in the grid. */ iam = grid.iam; if ( iam >= nprow * npcol ) goto out; if ( !iam ) { int v_major, v_minor, v_bugfix; superlu_dist_GetVersionNumber(&v_major, &v_minor, &v_bugfix); printf("Library version:\t%d.%d.%d\n", v_major, v_minor, v_bugfix); printf("Input matrix file:\t%s\n", *cpp); printf("Process grid:\t\t%d X %d\n", (int)grid.nprow, (int)grid.npcol); fflush(stdout); } #if ( VAMPIR>=1 ) VT_traceoff(); #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter main()"); #endif /* ------------------------------------------------------------ GET THE MATRIX FROM FILE AND SETUP THE RIGHT HAND SIDE. ------------------------------------------------------------*/ zcreate_matrix(&A, nrhs, &b, &ldb, &xtrue, &ldx, fp, &grid); if ( !(berr = doubleMalloc_dist(nrhs)) ) ABORT("Malloc fails for berr[]."); /* ------------------------------------------------------------ NOW WE SOLVE THE LINEAR SYSTEM. ------------------------------------------------------------*/ /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ParSymbFact = NO; options.ColPerm = METIS_AT_PLUS_A; options.RowPerm = LargeDiag; options.ReplaceTinyPivot = NO; options.IterRefine = DOUBLE; options.Trans = NOTRANS; options.SolveInitialized = NO; options.RefineInitialized = NO; options.PrintStat = YES; */ set_default_options_dist(&options); #if 0 options.RowPerm = NOROWPERM; options.IterRefine = NOREFINE; options.ColPerm = NATURAL; options.Equil = NO; options.ReplaceTinyPivot = YES; #endif if (!iam) { print_sp_ienv_dist(&options); print_options_dist(&options); fflush(stdout); } m = A.nrow; n = A.ncol; /* Initialize ScalePermstruct and LUstruct. */ ScalePermstructInit(m, n, &ScalePermstruct); LUstructInit(n, &LUstruct); /* Initialize the statistics variables. */ PStatInit(&stat); /* Call the linear equation solver. */ pzgssvx(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid, &LUstruct, &SOLVEstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ pzinf_norm_error(iam, ((NRformat_loc *)A.Store)->m_loc, nrhs, b, ldb, xtrue, ldx, &grid); PStatPrint(&options, &stat, &grid); /* Print the statistics. */ /* ------------------------------------------------------------ DEALLOCATE STORAGE. ------------------------------------------------------------*/ PStatFree(&stat); Destroy_CompRowLoc_Matrix_dist(&A); ScalePermstructFree(&ScalePermstruct); Destroy_LU(n, &grid, &LUstruct); LUstructFree(&LUstruct); if ( options.SolveInitialized ) { zSolveFinalize(&options, &SOLVEstruct); } SUPERLU_FREE(b); SUPERLU_FREE(xtrue); SUPERLU_FREE(berr); /* ------------------------------------------------------------ RELEASE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ out: superlu_gridexit(&grid); /* ------------------------------------------------------------ TERMINATES THE MPI EXECUTION ENVIRONMENT. ------------------------------------------------------------*/ MPI_Finalize(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit main()"); #endif } int cpp_defs() { printf(".. CPP definitions:\n"); #if ( PRNTlevel>=1 ) printf("\tPRNTlevel = %d\n", PRNTlevel); #endif #if ( DEBUGlevel>=1 ) printf("\tDEBUGlevel = %d\n", DEBUGlevel); #endif #if ( PROFlevel>=1 ) printf("\tPROFlevel = %d\n", PROFlevel); #endif #if ( StaticPivot>=1 ) printf("\tStaticPivot = %d\n", StaticPivot); #endif printf("....\n"); return 0; } SuperLU_DIST_5.3.0/EXAMPLE/pddrive3_ABglobal.c0000644013363400111340000002362713233431301017350 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Driver program for pdgssvx_ABglobal example * *
 * -- Distributed SuperLU routine (version 4.1) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 * April 5, 2015
 * 
*/ #include #include "superlu_ddefs.h" /*! \brief * *
 * Purpose
 * =======
 *
 * The driver program pddrive3A_ABglobal.
 *
 * This example illustrates how to use pdgssvx_ABglobal to solve
 * systems repeatedly with the same sparsity pattern and similar
 * numerical values of matrix A.
 * In this case, the column permutation vector and symbolic factorization are
 * computed only once. The following data structures will be reused in the
 * subsequent call to pdgssvx_ABglobal:
 *        ScalePermstruct : DiagScale, R, C, perm_r, perm_c
 *        LUstruct        : etree, Glu_persist, Llu
 *
 * NOTE:
 * The distributed nonzero structures of L and U remain the same,
 * although the numerical values are different. So 'Llu' is set up once
 * in the first call to pdgssvx_ABglobal, and reused in the subsequent call.
 *
 * On an IBM SP, the program may be run by typing:
 *    poe pddrive3_ABglobal -r  -c    -procs 

*

*/ int main(int argc, char *argv[]) { superlu_dist_options_t options; SuperLUStat_t stat; SuperMatrix A; ScalePermstruct_t ScalePermstruct; LUstruct_t LUstruct; gridinfo_t grid; double *berr; double *a, *a1, *b, *b1, *xtrue; int_t *asub, *asub1, *xa, *xa1; int_t i, j, m, n, nnz; int_t nprow, npcol; int iam, info, ldb, ldx, nrhs; char trans[1]; char **cpp, c; FILE *fp, *fopen(); extern int cpp_defs(); /* prototypes */ extern void LUstructInit(const int_t, LUstruct_t *); extern void LUstructFree(LUstruct_t *); extern void Destroy_LU(int_t, gridinfo_t *, LUstruct_t *); nprow = 1; /* Default process rows. */ npcol = 1; /* Default process columns. */ nrhs = 1; /* Number of right-hand side. */ /* ------------------------------------------------------------ INITIALIZE MPI ENVIRONMENT. ------------------------------------------------------------*/ MPI_Init( &argc, &argv ); /* Parse command line argv[]. */ for (cpp = argv+1; *cpp; ++cpp) { if ( **cpp == '-' ) { c = *(*cpp+1); ++cpp; switch (c) { case 'h': printf("Options:\n"); printf("\t-r : process rows (default %d)\n", nprow); printf("\t-c : process columns (default %d)\n", npcol); exit(0); break; case 'r': nprow = atoi(*cpp); break; case 'c': npcol = atoi(*cpp); break; } } else { /* Last arg is considered a filename */ if ( !(fp = fopen(*cpp, "r")) ) { ABORT("File does not exist"); } break; } } /* ------------------------------------------------------------ INITIALIZE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ superlu_gridinit(MPI_COMM_WORLD, nprow, npcol, &grid); /* Bail out if I do not belong in the grid. */ iam = grid.iam; if ( iam >= nprow * npcol ) goto out; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter main()"); #endif /* ------------------------------------------------------------ PROCESS 0 READS THE MATRIX A, AND THEN BROADCASTS IT TO ALL THE OTHER PROCESSES. ------------------------------------------------------------*/ if ( !iam ) { /* Print the CPP definitions. */ cpp_defs(); /* Read the matrix stored on disk in Harwell-Boeing format. */ dreadhb_dist(iam, fp, &m, &n, &nnz, &a, &asub, &xa); printf("Input matrix file: %s\n", *cpp); printf("\tDimension\t%dx%d\t # nonzeros %d\n", m, n, nnz); printf("\tProcess grid\t%d X %d\n", (int) grid.nprow, (int) grid.npcol); /* Broadcast matrix A to the other PEs. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( a, nnz, MPI_DOUBLE, 0, grid.comm ); MPI_Bcast( asub, nnz, mpi_int_t, 0, grid.comm ); MPI_Bcast( xa, n+1, mpi_int_t, 0, grid.comm ); } else { /* Receive matrix A from PE 0. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid.comm ); /* Allocate storage for compressed column representation. */ dallocateA_dist(n, nnz, &a, &asub, &xa); MPI_Bcast( a, nnz, MPI_DOUBLE, 0, grid.comm ); MPI_Bcast( asub, nnz, mpi_int_t, 0, grid.comm ); MPI_Bcast( xa, n+1, mpi_int_t, 0, grid.comm ); } /* Create compressed column matrix for A. */ dCreate_CompCol_Matrix_dist(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE); /* Generate the exact solution and compute the right-hand side. */ if (!(b=doubleMalloc_dist(m*nrhs))) ABORT("Malloc fails for b[]"); if (!(xtrue=doubleMalloc_dist(n*nrhs))) ABORT("Malloc fails for xtrue[]"); *trans = 'N'; ldx = n; ldb = m; dGenXtrue_dist(n, nrhs, xtrue, ldx); dFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb); /* Save a copy of the right-hand side. */ if ( !(b1 = doubleMalloc_dist(m * nrhs)) ) ABORT("Malloc fails for b1[]"); for (j = 0; j < nrhs; ++j) for (i = 0; i < m; ++i) b1[i+j*ldb] = b[i+j*ldb]; if ( !(berr = doubleMalloc_dist(nrhs)) ) ABORT("Malloc fails for berr[]."); /* Save a copy of the matrix A. */ dallocateA_dist(n, nnz, &a1, &asub1, &xa1); for (i = 0; i < nnz; ++i) { a1[i] = a[i]; asub1[i] = asub[i]; } for (i = 0; i < n+1; ++i) xa1[i] = xa[i]; /* ------------------------------------------------------------ WE SOLVE THE LINEAR SYSTEM FOR THE FIRST TIME. ------------------------------------------------------------*/ /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = METIS_AT_PLUS_A; options.RowPerm = LargeDiag; options.ReplaceTinyPivot = YES; options.Trans = NOTRANS; options.IterRefine = DOUBLE; options.SolveInitialized = NO; options.RefineInitialized = NO; options.PrintStat = YES; */ set_default_options_dist(&options); if (!iam) { print_sp_ienv_dist(&options); print_options_dist(&options); } /* Initialize ScalePermstruct and LUstruct. */ ScalePermstructInit(m, n, &ScalePermstruct); LUstructInit(n, &LUstruct); /* Initialize the statistics variables. */ PStatInit(&stat); /* Call the linear equation solver: factorize and solve. */ pdgssvx_ABglobal(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid, &LUstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ if ( !iam ) { dinf_norm_error_dist(n, nrhs, b, ldb, xtrue, ldx, &grid); } PStatPrint(&options, &stat, &grid); /* Print the statistics. */ PStatFree(&stat); Destroy_CompCol_Matrix_dist(&A); /* Deallocate storage of matrix A. */ SUPERLU_FREE(b); /* Free storage of right-hand side. */ /* ------------------------------------------------------------ NOW WE SOLVE ANOTHER LINEAR SYSTEM. THE MATRIX A HAS THE SAME SPARSITY PATTERN AND THE SIMILAR NUMERICAL VALUES AS THAT IN A PREVIOUS SYSTEM. ------------------------------------------------------------*/ options.Fact = SamePattern_SameRowPerm; PStatInit(&stat); /* Initialize the statistics variables. */ /* Create compressed column matrix for A. */ dCreate_CompCol_Matrix_dist(&A, m, n, nnz, a1, asub1, xa1, SLU_NC, SLU_D, SLU_GE); /* Solve the linear system. */ pdgssvx_ABglobal(&options, &A, &ScalePermstruct, b1, ldb, nrhs, &grid, &LUstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ if ( !iam ) { printf("Solve a system with the same pattern and similar values.\n"); dinf_norm_error_dist(n, nrhs, b1, ldb, xtrue, ldx, &grid); } /* Print the statistics. */ PStatPrint(&options, &stat, &grid); /* ------------------------------------------------------------ DEALLOCATE STORAGE. ------------------------------------------------------------*/ PStatFree(&stat); Destroy_CompCol_Matrix_dist(&A); /* Deallocate storage of matrix A. */ Destroy_LU(n, &grid, &LUstruct); /* Deallocate storage associated with the L and U matrices. */ ScalePermstructFree(&ScalePermstruct); LUstructFree(&LUstruct); /* Deallocate the structure of L and U.*/ SUPERLU_FREE(b1); /* Free storage of right-hand side. */ SUPERLU_FREE(xtrue); /* Free storage of the exact solution. */ SUPERLU_FREE(berr); /* ------------------------------------------------------------ RELEASE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ out: superlu_gridexit(&grid); /* ------------------------------------------------------------ TERMINATES THE MPI EXECUTION ENVIRONMENT. ------------------------------------------------------------*/ MPI_Finalize(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit main()"); #endif } int cpp_defs() { printf(".. CPP definitions:\n"); #if ( PRNTlevel>=1 ) printf("\tPRNTlevel = %d\n", PRNTlevel); #endif #if ( DEBUGlevel>=1 ) printf("\tDEBUGlevel = %d\n", DEBUGlevel); #endif #if ( PROFlevel>=1 ) printf("\tPROFlevel = %d\n", PROFlevel); #endif #if ( StaticPivot>=1 ) printf("\tStaticPivot = %d\n", StaticPivot); #endif printf("....\n"); return 0; } SuperLU_DIST_5.3.0/EXAMPLE/dreadtriple.c0000644013363400111340000001017713233431301016400 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief * */ #include #include "superlu_ddefs.h" #undef EXPAND_SYM /*! brief * *
 * Output parameters
 * =================
 *   (nzval, rowind, colptr): (*rowind)[*] contains the row subscripts of
 *      nonzeros in columns of matrix A; (*nzval)[*] the numerical values;
 *	column i of A is given by (*nzval)[k], k = (*rowind)[i],...,
 *      (*rowind)[i+1]-1.
 * 
*/ void dreadtriple(FILE *fp, int_t *m, int_t *n, int_t *nonz, double **nzval, int_t **rowind, int_t **colptr) { int_t i, j, k, jsize, lasta, nnz, nz, new_nonz; double *a, *val; int_t *asub, *xa, *row, *col; int_t zero_base = 0; /* File format: * First line: #rows #non-zero * Triplet in the rest of lines: * row col value */ /*fscanf(fp, "%d%d%d", m, n, nonz);*/ #ifdef _LONGINT fscanf(fp, "%ld%ld", n, nonz); #else fscanf(fp, "%d%d", n, nonz); #endif #ifdef EXPAND_SYM new_nonz = 2 * *nonz - *n; #else new_nonz = *nonz; #endif *m = *n; printf("m %ld, n %ld, nonz %ld\n", *m, *n, *nonz); dallocateA_dist(*n, new_nonz, nzval, rowind, colptr); /* Allocate storage */ a = *nzval; asub = *rowind; xa = *colptr; if ( !(val = (double *) SUPERLU_MALLOC(new_nonz * sizeof(double))) ) ABORT("Malloc fails for val[]"); if ( !(row = (int_t *) SUPERLU_MALLOC(new_nonz * sizeof(int_t))) ) ABORT("Malloc fails for row[]"); if ( !(col = (int_t *) SUPERLU_MALLOC(new_nonz * sizeof(int_t))) ) ABORT("Malloc fails for col[]"); for (j = 0; j < *n; ++j) xa[j] = 0; /* Read into the triplet array from a file */ for (nnz = 0, nz = 0; nnz < *nonz; ++nnz) { #ifdef _LONGINT fscanf(fp, "%ld%ld%lf\n", &row[nz], &col[nz], &val[nz]); #else fscanf(fp, "%d%d%lf\n", &row[nz], &col[nz], &val[nz]); #endif if ( nnz == 0 ) /* first nonzero */ if ( row[0] == 0 || col[0] == 0 ) { zero_base = 1; printf("triplet file: row/col indices are zero-based.\n"); } else printf("triplet file: row/col indices are one-based.\n"); if ( !zero_base ) { /* Change to 0-based indexing. */ --row[nz]; --col[nz]; } if (row[nz] < 0 || row[nz] >= *m || col[nz] < 0 || col[nz] >= *n /*|| val[nz] == 0.*/) { fprintf(stderr, "nz %d, (%d, %d) = %e out of bound, removed\n", nz, row[nz], col[nz], val[nz]); exit(-1); } else { ++xa[col[nz]]; #ifdef EXPAND_SYM if ( row[nz] != col[nz] ) { /* Excluding diagonal */ ++nz; row[nz] = col[nz-1]; col[nz] = row[nz-1]; val[nz] = val[nz-1]; ++xa[col[nz]]; } #endif ++nz; } } *nonz = nz; #ifdef EXPAND_SYM printf("new_nonz after symmetric expansion:\t%d\n", *nonz); #endif /* Initialize the array of column pointers */ k = 0; jsize = xa[0]; xa[0] = 0; for (j = 1; j < *n; ++j) { k += jsize; jsize = xa[j]; xa[j] = k; } /* Copy the triplets into the column oriented storage */ for (nz = 0; nz < *nonz; ++nz) { j = col[nz]; k = xa[j]; asub[k] = row[nz]; a[k] = val[nz]; ++xa[j]; } /* Reset the column pointers to the beginning of each column */ for (j = *n; j > 0; --j) xa[j] = xa[j-1]; xa[0] = 0; SUPERLU_FREE(val); SUPERLU_FREE(row); SUPERLU_FREE(col); #ifdef CHK_INPUT for (i = 0; i < *n; i++) { printf("Col %d, xa %d\n", i, xa[i]); for (k = xa[i]; k < xa[i+1]; k++) printf("%d\t%16.10f\n", asub[k], a[k]); } #endif } void dreadrhs(int m, double *b) { FILE *fp, *fopen(); int i, j; if ( !(fp = fopen("b.dat", "r")) ) { fprintf(stderr, "dreadrhs: file does not exist\n"); exit(-1); } for (i = 0; i < m; ++i) fscanf(fp, "%lf\n", &b[i]); /*fscanf(fp, "%d%lf\n", &j, &b[i]);*/ /* readpair_(j, &b[i]);*/ fclose(fp); } SuperLU_DIST_5.3.0/EXAMPLE/README0000644013363400111340000000424213233431301014611 0ustar xiaoyessg SuperLU_DIST EXAMPLES ====================== This directory contains sample programs to illustrate how to use various functions provided in SuperLU_DIST. You can modify these examples to suit your applications. The examples illustrate the following functionalities: 1. pddrive.c, pddrive_ABglobal.c Use PDGSSVX with the full (default) options to solve a linear system. 2. pddrive1.c, pddrive1_ABglobal.c Solve the systems with same A but different right-hand side. (Reuse the factored form of A) 3. pddrive2.c, pddrive2_ABglobal.c Solve the systems with the same sparsity pattern of A. (Reuse the sparsity ordering) 4. pddrive3.c, pddrive3_ABglobal.c Solve the systems with the same sparsity pattern and similar values. 5. pddrive4.c, pddrive4_ABglobal.c Divide the processors into two subgroups (two grids) such that each subgroup solves a linear system independently from the other. The command line options "-r " and "-c " defines the 2-D process grid. The total number of processes is: = * If the options is not provided at the command line, the programs will use 1 processor as default in each case. Three input matrices (Harwell-Boeing format) are provided in this directory: g20.rua -- a real matrix of dimension 400x400 big.rua -- a real matrix of dimension 4960x4960 cg20.cua -- a complex matrix of dimension 400x400 The command lines given below show how to run the parallel programs using "mpiexec". You may need to replace mpiexec by platform specific command. 1. To run the real examples (pddrive, pddrive1, etc.) you may type: % mpiexec -n pddrive -r -c g20.rua (e.g., mpiexec -n 4 pddrive -r 2 -c 2 g20.rua) 2. To run the real examples pddrive4 and pddrive4_ABglobal, you may type: % mpiexec -n 10 pddrive4 g20.rua 3. To run the complex examples (pzdrive, pzdrive1, etc.), you may type: % mpiexec -n pzdrive -r -c cg20.cua 4. To run the complex examples pzdrive4 and pzdrive4_ABglobal, you may type: % mpiexec -n 10 pzdrive4 cg20.cua SuperLU_DIST_5.3.0/EXAMPLE/pddrive4_ABglobal.c0000644013363400111340000002740313233431301017345 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief This example illustrates how to divide up the processes into subgroups * *
 * -- Distributed SuperLU routine (version 4.1) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 * April 5, 2015
 * 
*/ #include #include "superlu_ddefs.h" /*! \brief * *
 * Purpose
 * =======
 *
 * The driver program pddrive4_ABglobal.
 *
 * This example illustrates how to divide up the processes into
 * subgroups (multiple grids) such that each subgroup solves a linear
 * system independently from the other.
 *
 * In this example, there are 2 subgroups:
 *  1. subgroup 1 consists of processes 0 to 5 arranged as
 *     a 2-by-3 process grid.
 *  2. subgroup 2 consists of processes 6 to 9 arranged as
 *     a 2-by-2 process grid.
 *
 * On an IBM SP, the program may be run by typing
 *    poe pddrive4_ABglobal  -procs 10
 * 
*/ int main(int argc, char *argv[]) { superlu_dist_options_t options; SuperLUStat_t stat; SuperMatrix A; ScalePermstruct_t ScalePermstruct; LUstruct_t LUstruct; gridinfo_t grid1, grid2; double *berr; double *a, *b, *xtrue; int_t *asub, *xa; int_t i, j, m, n, nnz; int_t nprow, npcol, ldumap, p; int_t usermap[6]; int iam, info, ldb, ldx, nprocs; int nrhs = 1; /* Number of right-hand side. */ char trans[1]; char **cpp, c; FILE *fp, *fopen(); /* prototypes */ extern void LUstructInit(const int_t, LUstruct_t *); extern void LUstructFree(LUstruct_t *); extern void Destroy_LU(int_t, gridinfo_t *, LUstruct_t *); /* ------------------------------------------------------------ INITIALIZE MPI ENVIRONMENT. ------------------------------------------------------------*/ MPI_Init( &argc, &argv ); MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); if ( nprocs < 10 ) { fprintf(stderr, "Requires at least 10 processes\n"); exit(-1); } /* Parse command line argv[]. */ for (cpp = argv+1; *cpp; ++cpp) { if ( **cpp == '-' ) { c = *(*cpp+1); ++cpp; switch (c) { case 'h': printf("Options:\n"); printf("\t-r : process rows (default %d)\n", nprow); printf("\t-c : process columns (default %d)\n", npcol); exit(0); break; case 'r': nprow = atoi(*cpp); break; case 'c': npcol = atoi(*cpp); break; } } else { /* Last arg is considered a filename */ if ( !(fp = fopen(*cpp, "r")) ) { ABORT("File does not exist"); } break; } } /* ------------------------------------------------------------ INITIALIZE THE SUPERLU PROCESS GRID 1. ------------------------------------------------------------*/ nprow = 2; npcol = 3; ldumap = 2; p = 0; /* Grid 1 starts from process 0. */ for (i = 0; i < nprow; ++i) for (j = 0; j < npcol; ++j) usermap[i+j*ldumap] = p++; superlu_gridmap(MPI_COMM_WORLD, nprow, npcol, usermap, ldumap, &grid1); /* ------------------------------------------------------------ INITIALIZE THE SUPERLU PROCESS GRID 2. ------------------------------------------------------------*/ nprow = 2; npcol = 2; ldumap = 2; p = 6; /* Grid 2 starts from process 6. */ for (i = 0; i < nprow; ++i) for (j = 0; j < npcol; ++j) usermap[i+j*ldumap] = p++; superlu_gridmap(MPI_COMM_WORLD, nprow, npcol, usermap, ldumap, &grid2); /* Bail out if I do not belong in any of the 2 grids. */ MPI_Comm_rank( MPI_COMM_WORLD, &iam ); if ( iam >= 10 ) goto out; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter main()"); #endif if ( iam >= 0 && iam < 6 ) { /* I am in grid 1. */ iam = grid1.iam; /* Get the logical number in the new grid. */ /* ------------------------------------------------------------ PROCESS 0 READS THE MATRIX A, AND THEN BROADCASTS IT TO ALL THE OTHER PROCESSES. ------------------------------------------------------------*/ if ( !iam ) { /* Read the matrix stored on disk in Harwell-Boeing format. */ dreadhb_dist(iam, fp, &m, &n, &nnz, &a, &asub, &xa); printf("\tDimension\t%dx%d\t # nonzeros %d\n", m, n, nnz); printf("\tProcess grid\t%d X %d\n", (int) grid1.nprow, (int) grid1.npcol); /* Broadcast matrix A to the other PEs. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid1.comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid1.comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid1.comm ); MPI_Bcast( a, nnz, MPI_DOUBLE, 0, grid1.comm ); MPI_Bcast( asub, nnz, mpi_int_t, 0, grid1.comm ); MPI_Bcast( xa, n+1, mpi_int_t, 0, grid1.comm ); } else { /* Receive matrix A from PE 0. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid1.comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid1.comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid1.comm ); /* Allocate storage for compressed column representation. */ dallocateA_dist(n, nnz, &a, &asub, &xa); MPI_Bcast( a, nnz, MPI_DOUBLE, 0, grid1.comm ); MPI_Bcast( asub, nnz, mpi_int_t, 0, grid1.comm ); MPI_Bcast( xa, n+1, mpi_int_t, 0, grid1.comm ); } /* Create compressed column matrix for A. */ dCreate_CompCol_Matrix_dist(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE); /* Generate the exact solution and compute the right-hand side. */ if (!(b=doubleMalloc_dist(m*nrhs))) ABORT("Malloc fails for b[]"); if (!(xtrue=doubleMalloc_dist(n*nrhs))) ABORT("Malloc fails for xtrue[]"); *trans = 'N'; ldx = n; ldb = m; dGenXtrue_dist(n, nrhs, xtrue, ldx); dFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb); if ( !(berr = doubleMalloc_dist(nrhs)) ) ABORT("Malloc fails for berr[]."); /* ------------------------------------------------------------ NOW WE SOLVE THE LINEAR SYSTEM. ------------------------------------------------------------*/ /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = METIS_AT_PLUS_A; options.RowPerm = LargeDiag; options.ReplaceTinyPivot = YES; options.Trans = NOTRANS; options.IterRefine = DOUBLE; options.SolveInitialized = NO; options.RefineInitialized = NO; options.PrintStat = YES; */ set_default_options_dist(&options); if (!iam) { print_sp_ienv_dist(&options); print_options_dist(&options); } /* Initialize ScalePermstruct and LUstruct. */ ScalePermstructInit(m, n, &ScalePermstruct); LUstructInit(n, &LUstruct); /* Initialize the statistics variables. */ PStatInit(&stat); /* Call the linear equation solver: factorize and solve. */ pdgssvx_ABglobal(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid1, &LUstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ if ( !iam ) { dinf_norm_error_dist(n, nrhs, b, ldb, xtrue, ldx, &grid1); } /* Print the statistics. */ PStatPrint(&options, &stat, &grid1); /* ------------------------------------------------------------ DEALLOCATE STORAGE. ------------------------------------------------------------*/ PStatFree(&stat); Destroy_CompCol_Matrix_dist(&A); Destroy_LU(n, &grid1, &LUstruct); ScalePermstructFree(&ScalePermstruct); LUstructFree(&LUstruct); SUPERLU_FREE(b); SUPERLU_FREE(xtrue); SUPERLU_FREE(berr); } else { /* I am in grid 2. */ iam = grid2.iam; /* Get the logical number in the new grid. */ /* ------------------------------------------------------------ PROCESS 0 READS THE MATRIX A, AND THEN BROADCASTS IT TO ALL THE OTHER PROCESSES. ------------------------------------------------------------*/ if ( !iam ) { /* Read the matrix stored on disk in Harwell-Boeing format. */ dreadhb_dist(iam, fp, &m, &n, &nnz, &a, &asub, &xa); printf("\tDimension\t%dx%d\t # nonzeros %d\n", m, n, nnz); printf("\tProcess grid\t%d X %d\n", (int) grid2.nprow, (int) grid2.npcol); /* Broadcast matrix A to the other PEs. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid2.comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid2.comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid2.comm ); MPI_Bcast( a, nnz, MPI_DOUBLE, 0, grid2.comm ); MPI_Bcast( asub, nnz, mpi_int_t, 0, grid2.comm ); MPI_Bcast( xa, n+1, mpi_int_t, 0, grid2.comm ); } else { /* Receive matrix A from PE 0. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid2.comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid2.comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid2.comm ); /* Allocate storage for compressed column representation. */ dallocateA_dist(n, nnz, &a, &asub, &xa); MPI_Bcast( a, nnz, MPI_DOUBLE, 0, grid2.comm ); MPI_Bcast( asub, nnz, mpi_int_t, 0, grid2.comm ); MPI_Bcast( xa, n+1, mpi_int_t, 0, grid2.comm ); } /* Create compressed column matrix for A. */ dCreate_CompCol_Matrix_dist(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE); /* Generate the exact solution and compute the right-hand side. */ if (!(b=doubleMalloc_dist(m*nrhs))) ABORT("Malloc fails for b[]"); if (!(xtrue=doubleMalloc_dist(n*nrhs))) ABORT("Malloc fails for xtrue[]"); *trans = 'N'; ldx = n; ldb = m; dGenXtrue_dist(n, nrhs, xtrue, ldx); dFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb); if ( !(berr = doubleMalloc_dist(nrhs)) ) ABORT("Malloc fails for berr[]."); /* ------------------------------------------------------------ NOW WE SOLVE THE LINEAR SYSTEM. ------------------------------------------------------------*/ /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = MMD_AT_PLUS_A; options.RowPerm = LargeDiag; options.ReplaceTinyPivot = YES; options.Trans = NOTRANS; options.IterRefine = DOUBLE; options.SolveInitialized = NO; options.RefineInitialized = NO; options.PrintStat = YES; */ set_default_options_dist(&options); /* Initialize ScalePermstruct and LUstruct. */ ScalePermstructInit(m, n, &ScalePermstruct); LUstructInit(n, &LUstruct); /* Initialize the statistics variables. */ PStatInit(&stat); /* Call the linear equation solver: factorize and solve. */ pdgssvx_ABglobal(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid2, &LUstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ if ( !iam ) { dinf_norm_error_dist(n, nrhs, b, ldb, xtrue, ldx, &grid2); } /* Print the statistics. */ PStatPrint(&options, &stat, &grid2); /* ------------------------------------------------------------ DEALLOCATE STORAGE. ------------------------------------------------------------*/ PStatFree(&stat); Destroy_CompCol_Matrix_dist(&A); Destroy_LU(n, &grid2, &LUstruct); ScalePermstructFree(&ScalePermstruct); LUstructFree(&LUstruct); SUPERLU_FREE(b); SUPERLU_FREE(xtrue); SUPERLU_FREE(berr); } /* ------------------------------------------------------------ RELEASE THE SUPERLU PROCESS GRIDS. ------------------------------------------------------------*/ superlu_gridexit(&grid1); superlu_gridexit(&grid2); out: /* ------------------------------------------------------------ TERMINATES THE MPI EXECUTION ENVIRONMENT. ------------------------------------------------------------*/ MPI_Finalize(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit main()"); #endif } SuperLU_DIST_5.3.0/EXAMPLE/pddrive1.c0000644013363400111340000001713413233431301015617 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Driver program for PDGSSVX example * *
 * -- Distributed SuperLU routine (version 4.1) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * March 15, 2003
 * April 5, 2015
 * 
*/ #include #include #include "superlu_ddefs.h" /*! \brief * *
 * Purpose
 * =======
 *
 * The driver program PDDRIVE1.
 *
 * This example illustrates how to use PDGSSVX to
 * solve systems with the same A but different right-hand side.
 * In this case, we factorize A only once in the first call to
 * PDGSSVX, and reuse the following data structures
 * in the subsequent call to PDGSSVX:
 *        ScalePermstruct  : DiagScale, R, C, perm_r, perm_c
 *        LUstruct         : Glu_persist, Llu
 * 
 * With MPICH,  program may be run by typing:
 *    mpiexec -n  pddrive1 -r  -c  big.rua
 * 
*/ int main(int argc, char *argv[]) { superlu_dist_options_t options; SuperLUStat_t stat; SuperMatrix A; ScalePermstruct_t ScalePermstruct; LUstruct_t LUstruct; SOLVEstruct_t SOLVEstruct; gridinfo_t grid; double *berr; double *b, *xtrue, *b1; int i, j, m, n; int nprow, npcol; int iam, info, ldb, ldx, nrhs; char **cpp, c; FILE *fp, *fopen(); int cpp_defs(); nprow = 1; /* Default process rows. */ npcol = 1; /* Default process columns. */ nrhs = 1; /* Number of right-hand side. */ /* ------------------------------------------------------------ INITIALIZE MPI ENVIRONMENT. ------------------------------------------------------------*/ MPI_Init( &argc, &argv ); /* Parse command line argv[]. */ for (cpp = argv+1; *cpp; ++cpp) { if ( **cpp == '-' ) { c = *(*cpp+1); ++cpp; switch (c) { case 'h': printf("Options:\n"); printf("\t-r : process rows (default %d)\n", nprow); printf("\t-c : process columns (default %d)\n", npcol); exit(0); break; case 'r': nprow = atoi(*cpp); break; case 'c': npcol = atoi(*cpp); break; } } else { /* Last arg is considered a filename */ if ( !(fp = fopen(*cpp, "r")) ) { ABORT("File does not exist"); } break; } } /* ------------------------------------------------------------ INITIALIZE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ superlu_gridinit(MPI_COMM_WORLD, nprow, npcol, &grid); /* Bail out if I do not belong in the grid. */ iam = grid.iam; if ( iam >= nprow * npcol ) goto out; if ( !iam ) { int v_major, v_minor, v_bugfix; superlu_dist_GetVersionNumber(&v_major, &v_minor, &v_bugfix); printf("Library version:\t%d.%d.%d\n", v_major, v_minor, v_bugfix); printf("Input matrix file:\t%s\n", *cpp); printf("Process grid:\t\t%d X %d\n", (int)grid.nprow, (int)grid.npcol); fflush(stdout); } #if ( VAMPIR>=1 ) VT_traceoff(); #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter main()"); #endif /* ------------------------------------------------------------ GET THE MATRIX FROM FILE AND SETUP THE RIGHT HAND SIDE. ------------------------------------------------------------*/ dcreate_matrix(&A, nrhs, &b, &ldb, &xtrue, &ldx, fp, &grid); if ( !(b1 = doubleMalloc_dist(ldb * nrhs)) ) ABORT("Malloc fails for b1[]"); for (j = 0; j < nrhs; ++j) for (i = 0; i < ldb; ++i) b1[i+j*ldb] = b[i+j*ldb]; if ( !(berr = doubleMalloc_dist(nrhs)) ) ABORT("Malloc fails for berr[]."); /* ------------------------------------------------------------ WE SOLVE THE LINEAR SYSTEM FOR THE FIRST TIME. ------------------------------------------------------------*/ /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = METIS_AT_PLUS_A; options.RowPerm = LargeDiag; options.ReplaceTinyPivot = NO; options.Trans = NOTRANS; options.IterRefine = DOUBLE; options.SolveInitialized = NO; options.RefineInitialized = NO; options.PrintStat = YES; */ set_default_options_dist(&options); printf("options.ColPerm = %d\n", options.ColPerm); if (!iam) { print_sp_ienv_dist(&options); print_options_dist(&options); fflush(stdout); } m = A.nrow; n = A.ncol; /* Initialize ScalePermstruct and LUstruct. */ ScalePermstructInit(m, n, &ScalePermstruct); LUstructInit(n, &LUstruct); /* Initialize the statistics variables. */ PStatInit(&stat); /* Call the linear equation solver. */ pdgssvx(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid, &LUstruct, &SOLVEstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ if ( !iam ) printf("\tSolve the first system:\n"); pdinf_norm_error(iam, ((NRformat_loc *)A.Store)->m_loc, nrhs, b, ldb, xtrue, ldx, &grid); PStatPrint(&options, &stat, &grid); /* Print the statistics. */ PStatFree(&stat); /* ------------------------------------------------------------ NOW WE SOLVE ANOTHER SYSTEM WITH THE SAME A BUT DIFFERENT RIGHT-HAND SIDE, WE WILL USE THE EXISTING L AND U FACTORS IN LUSTRUCT OBTAINED FROM A PREVIOUS FATORIZATION. ------------------------------------------------------------*/ options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */ PStatInit(&stat); /* Initialize the statistics variables. */ pdgssvx(&options, &A, &ScalePermstruct, b1, ldb, nrhs, &grid, &LUstruct, &SOLVEstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ if ( !iam ) printf("\tSolve the system with a different B:\n"); pdinf_norm_error(iam, ((NRformat_loc *)A.Store)->m_loc, nrhs, b1, ldb, xtrue, ldx, &grid); PStatPrint(&options, &stat, &grid); /* Print the statistics. */ /* ------------------------------------------------------------ DEALLOCATE STORAGE. ------------------------------------------------------------*/ PStatFree(&stat); Destroy_CompRowLoc_Matrix_dist(&A); ScalePermstructFree(&ScalePermstruct); Destroy_LU(n, &grid, &LUstruct); LUstructFree(&LUstruct); if ( options.SolveInitialized ) { dSolveFinalize(&options, &SOLVEstruct); } SUPERLU_FREE(b); SUPERLU_FREE(b1); SUPERLU_FREE(xtrue); SUPERLU_FREE(berr); /* ------------------------------------------------------------ RELEASE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ out: superlu_gridexit(&grid); /* ------------------------------------------------------------ TERMINATES THE MPI EXECUTION ENVIRONMENT. ------------------------------------------------------------*/ MPI_Finalize(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit main()"); #endif } int cpp_defs() { printf(".. CPP definitions:\n"); #if ( PRNTlevel>=1 ) printf("\tPRNTlevel = %d\n", PRNTlevel); #endif #if ( DEBUGlevel>=1 ) printf("\tDEBUGlevel = %d\n", DEBUGlevel); #endif #if ( PROFlevel>=1 ) printf("\tPROFlevel = %d\n", PROFlevel); #endif #if ( StaticPivot>=1 ) printf("\tStaticPivot = %d\n", StaticPivot); #endif printf("....\n"); return 0; } SuperLU_DIST_5.3.0/EXAMPLE/pddrive2.c0000644013363400111340000002133213233431301015613 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Driver program for PDGSSVX example * *
 * -- Distributed SuperLU routine (version 5.1.3) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * March 15, 2003
 * April 5, 2015
 * December 31, 2016 version 5.1.3
 * 
*/ #include #include #include "superlu_ddefs.h" /*! \brief * *
 * Purpose
 * =======
 *
 * The driver program PDDRIVE2.
 *
 * This example illustrates how to use  to solve
 * systems repeatedly with the same sparsity pattern of matrix A.
 * In this case, the column permutation vector ScalePermstruct->perm_c is
 * computed once. The following data structures will be reused in the
 * subsequent call to PDGSSVX:
 *        ScalePermstruct : perm_c
 *        LUstruct        : etree
 *
 * With MPICH,  program may be run by typing:
 *    mpiexec -n  pddrive2 -r  -c  g20.rua
 * 
*/ int main(int argc, char *argv[]) { superlu_dist_options_t options; SuperLUStat_t stat; SuperMatrix A; NRformat_loc *Astore; ScalePermstruct_t ScalePermstruct; LUstruct_t LUstruct; SOLVEstruct_t SOLVEstruct; gridinfo_t grid; double *berr; double *b, *b1, *xtrue, *xtrue1; int_t *colind, *colind1, *rowptr, *rowptr1; int_t i, j, m, n, nnz_loc, m_loc; int nprow, npcol; int iam, info, ldb, ldx, nrhs; char **cpp, c; FILE *fp, *fopen(); int cpp_defs(); /* prototypes */ extern int dcreate_matrix_perturbed (SuperMatrix *, int, double **, int *, double **, int *, FILE *, gridinfo_t *); nprow = 1; /* Default process rows. */ npcol = 1; /* Default process columns. */ nrhs = 1; /* Number of right-hand side. */ /* ------------------------------------------------------------ INITIALIZE MPI ENVIRONMENT. ------------------------------------------------------------*/ MPI_Init( &argc, &argv ); /* Parse command line argv[]. */ for (cpp = argv+1; *cpp; ++cpp) { if ( **cpp == '-' ) { c = *(*cpp+1); ++cpp; switch (c) { case 'h': printf("Options:\n"); printf("\t-r : process rows (default %4d)\n", nprow); printf("\t-c : process columns (default %4d)\n", npcol); exit(0); break; case 'r': nprow = atoi(*cpp); break; case 'c': npcol = atoi(*cpp); break; } } else { /* Last arg is considered a filename */ if ( !(fp = fopen(*cpp, "r")) ) { ABORT("File does not exist"); } break; } } /* ------------------------------------------------------------ INITIALIZE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ superlu_gridinit(MPI_COMM_WORLD, nprow, npcol, &grid); /* Bail out if I do not belong in the grid. */ iam = grid.iam; if ( iam >= nprow * npcol ) goto out; if ( !iam ) { int v_major, v_minor, v_bugfix; superlu_dist_GetVersionNumber(&v_major, &v_minor, &v_bugfix); printf("Library version:\t%d.%d.%d\n", v_major, v_minor, v_bugfix); printf("Input matrix file:\t%s\n", *cpp); printf("Process grid:\t\t%d X %d\n", (int)grid.nprow, (int)grid.npcol); fflush(stdout); } #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter main()"); #endif /* ------------------------------------------------------------ GET THE MATRIX FROM FILE AND SETUP THE RIGHT-HAND SIDE. ------------------------------------------------------------*/ dcreate_matrix(&A, nrhs, &b, &ldb, &xtrue, &ldx, fp, &grid); if ( !(berr = doubleMalloc_dist(nrhs)) ) ABORT("Malloc fails for berr[]."); m = A.nrow; n = A.ncol; Astore = (NRformat_loc *) A.Store; m_loc = Astore->m_loc; /* ------------------------------------------------------------ WE SOLVE THE LINEAR SYSTEM FOR THE FIRST TIME. ------------------------------------------------------------*/ /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = METIS_AT_PLUS_A; options.RowPerm = LargeDiag; options.ReplaceTinyPivot = NO; options.Trans = NOTRANS; options.IterRefine = DOUBLE; options.SolveInitialized = NO; options.RefineInitialized = NO; options.PrintStat = YES; */ set_default_options_dist(&options); if (!iam) { print_sp_ienv_dist(&options); print_options_dist(&options); fflush(stdout); } /* Initialize ScalePermstruct and LUstruct. */ ScalePermstructInit(m, n, &ScalePermstruct); LUstructInit(n, &LUstruct); /* Initialize the statistics variables. */ PStatInit(&stat); /* Call the linear equation solver: factorize and solve. */ pdgssvx(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid, &LUstruct, &SOLVEstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ pdinf_norm_error(iam, m_loc, nrhs, b, ldb, xtrue, ldx, &grid); PStatPrint(&options, &stat, &grid); /* Print the statistics. */ PStatFree(&stat); Destroy_CompRowLoc_Matrix_dist(&A); /* Deallocate storage of matrix A. */ Destroy_LU(n, &grid, &LUstruct); /* Deallocate storage associated with the L and U matrices. */ SUPERLU_FREE(b); /* Free storage of right-hand side. */ SUPERLU_FREE(xtrue); /* Free storage of the exact solution. */ /* ------------------------------------------------------------ NOW WE SOLVE ANOTHER LINEAR SYSTEM. ONLY THE SPARSITY PATTERN OF MATRIX A IS THE SAME. ------------------------------------------------------------*/ options.Fact = SamePattern; if (iam==0) { print_options_dist(&options); #if ( PRNTlevel>=2 ) PrintInt10("perm_r", m, ScalePermstruct.perm_r); PrintInt10("perm_c", n, ScalePermstruct.perm_c); #endif } /* Get the matrix from file, perturbed some diagonal entries to force a different perm_r[]. Set up the right-hand side. */ if ( !(fp = fopen(*cpp, "r")) ) ABORT("File does not exist"); dcreate_matrix_perturbed(&A, nrhs, &b1, &ldb, &xtrue1, &ldx, fp, &grid); PStatInit(&stat); /* Initialize the statistics variables. */ /* Solve the linear system. */ pdgssvx(&options, &A, &ScalePermstruct, b1, ldb, nrhs, &grid, &LUstruct, &SOLVEstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ if ( !iam ) printf("Solve the system with the same sparsity pattern.\n"); pdinf_norm_error(iam, m_loc, nrhs, b1, ldb, xtrue1, ldx, &grid); #if ( PRNTlevel>=2 ) if (iam==0) { PrintInt10("new perm_r", m, ScalePermstruct.perm_r); PrintInt10("new perm_c", n, ScalePermstruct.perm_c); } #endif /* Print the statistics. */ PStatPrint(&options, &stat, &grid); /* ------------------------------------------------------------ DEALLOCATE STORAGE. ------------------------------------------------------------*/ PStatFree(&stat); Destroy_CompRowLoc_Matrix_dist(&A); /* Deallocate storage of matrix A. */ Destroy_LU(n, &grid, &LUstruct); /* Deallocate storage associated with the L and U matrices. */ ScalePermstructFree(&ScalePermstruct); LUstructFree(&LUstruct); /* Deallocate the structure of L and U.*/ if ( options.SolveInitialized ) { dSolveFinalize(&options, &SOLVEstruct); } SUPERLU_FREE(b1); /* Free storage of right-hand side. */ SUPERLU_FREE(xtrue1); /* Free storage of the exact solution. */ SUPERLU_FREE(berr); /* ------------------------------------------------------------ RELEASE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ out: superlu_gridexit(&grid); /* ------------------------------------------------------------ TERMINATES THE MPI EXECUTION ENVIRONMENT. ------------------------------------------------------------*/ MPI_Finalize(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit main()"); #endif } int cpp_defs() { printf(".. CPP definitions:\n"); #if ( PRNTlevel>=1 ) printf("\tPRNTlevel = %d\n", PRNTlevel); #endif #if ( DEBUGlevel>=1 ) printf("\tDEBUGlevel = %d\n", DEBUGlevel); #endif #if ( PROFlevel>=1 ) printf("\tPROFlevel = %d\n", PROFlevel); #endif #if ( StaticPivot>=1 ) printf("\tStaticPivot = %d\n", StaticPivot); #endif printf("....\n"); return 0; } SuperLU_DIST_5.3.0/EXAMPLE/pddrive3.c0000644013363400111340000002167413233431301015625 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Driver program for PDGSSVX example * *
 * -- Distributed SuperLU routine (version 4.1) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * March 15, 2003
 * April 5, 2015
 * 
*/ #include #include #include "superlu_ddefs.h" /*! \brief * *
 * Purpose
 * =======
 *
 * The driver program PDDRIVE3.
 *
 * This example illustrates how to use PDGSSVX to solve
 * systems repeatedly with the same sparsity pattern and similar
 * numerical values of matrix A.
 * In this case, the column permutation vector and symbolic factorization are
 * computed only once. The following data structures will be reused in the
 * subsequent call to PDGSSVX:
 *        ScalePermstruct : DiagScale, R, C, perm_r, perm_c
 *        LUstruct        : etree, Glu_persist, Llu
 *
 * NOTE:
 * The distributed nonzero structures of L and U remain the same,
 * although the numerical values are different. So 'Llu' is set up once
 * in the first call to PDGSSVX, and reused in the subsequent call.
 *
 * With MPICH,  program may be run by typing:
 *    mpiexec -n  pddrive3 -r  -c  big.rua
 * 
*/ int main(int argc, char *argv[]) { superlu_dist_options_t options; SuperLUStat_t stat; SuperMatrix A; NRformat_loc *Astore; ScalePermstruct_t ScalePermstruct; LUstruct_t LUstruct; SOLVEstruct_t SOLVEstruct; gridinfo_t grid; double *berr; double *b, *b1, *xtrue, *nzval, *nzval1; int_t *colind, *colind1, *rowptr, *rowptr1; int_t i, j, m, n, nnz_loc, m_loc, fst_row; int nprow, npcol; int iam, info, ldb, ldx, nrhs; char **cpp, c; FILE *fp, *fopen(); int cpp_defs(); nprow = 1; /* Default process rows. */ npcol = 1; /* Default process columns. */ nrhs = 1; /* Number of right-hand side. */ /* ------------------------------------------------------------ INITIALIZE MPI ENVIRONMENT. ------------------------------------------------------------*/ MPI_Init( &argc, &argv ); /* Parse command line argv[]. */ for (cpp = argv+1; *cpp; ++cpp) { if ( **cpp == '-' ) { c = *(*cpp+1); ++cpp; switch (c) { case 'h': printf("Options:\n"); printf("\t-r : process rows (default %d)\n", nprow); printf("\t-c : process columns (default %d)\n", npcol); exit(0); break; case 'r': nprow = atoi(*cpp); break; case 'c': npcol = atoi(*cpp); break; } } else { /* Last arg is considered a filename */ if ( !(fp = fopen(*cpp, "r")) ) { ABORT("File does not exist"); } break; } } /* ------------------------------------------------------------ INITIALIZE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ superlu_gridinit(MPI_COMM_WORLD, nprow, npcol, &grid); /* Bail out if I do not belong in the grid. */ iam = grid.iam; if ( iam >= nprow * npcol ) goto out; if ( !iam ) { int v_major, v_minor, v_bugfix; superlu_dist_GetVersionNumber(&v_major, &v_minor, &v_bugfix); printf("Library version:\t%d.%d.%d\n", v_major, v_minor, v_bugfix); printf("Input matrix file:\t%s\n", *cpp); printf("Process grid:\t\t%d X %d\n", (int)grid.nprow, (int)grid.npcol); fflush(stdout); } #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter main()"); #endif /* ------------------------------------------------------------ GET THE MATRIX FROM FILE AND SETUP THE RIGHT HAND SIDE. ------------------------------------------------------------*/ dcreate_matrix(&A, nrhs, &b, &ldb, &xtrue, &ldx, fp, &grid); if ( !(b1 = doubleMalloc_dist(ldb * nrhs)) ) ABORT("Malloc fails for b1[]"); for (j = 0; j < nrhs; ++j) for (i = 0; i < ldb; ++i) b1[i+j*ldb] = b[i+j*ldb]; if ( !(berr = doubleMalloc_dist(nrhs)) ) ABORT("Malloc fails for berr[]."); m = A.nrow; n = A.ncol; /* Save a copy of the matrix A. */ Astore = (NRformat_loc *) A.Store; nnz_loc = Astore->nnz_loc; m_loc = Astore->m_loc; fst_row = Astore->fst_row; nzval = Astore->nzval; colind = Astore->colind; rowptr = Astore->rowptr; nzval1 = doubleMalloc_dist(nnz_loc); colind1 = intMalloc_dist(nnz_loc); rowptr1 = intMalloc_dist(m_loc+1); for (i = 0; i < nnz_loc; ++i) { nzval1[i] = nzval[i]; colind1[i] = colind[i]; } for (i = 0; i < m_loc+1; ++i) rowptr1[i] = rowptr[i]; /* ------------------------------------------------------------ WE SOLVE THE LINEAR SYSTEM FOR THE FIRST TIME. ------------------------------------------------------------*/ /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = METIS_AT_PLUS_A; options.RowPerm = LargeDiag; options.ReplaceTinyPivot = NO; options.Trans = NOTRANS; options.IterRefine = DOUBLE; options.SolveInitialized = NO; options.RefineInitialized = NO; options.PrintStat = YES; */ set_default_options_dist(&options); if (!iam) { print_sp_ienv_dist(&options); print_options_dist(&options); fflush(stdout); } /* Initialize ScalePermstruct and LUstruct. */ ScalePermstructInit(m, n, &ScalePermstruct); LUstructInit(n, &LUstruct); /* Initialize the statistics variables. */ PStatInit(&stat); /* Call the linear equation solver: factorize and solve. */ pdgssvx(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid, &LUstruct, &SOLVEstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ pdinf_norm_error(iam, m_loc, nrhs, b, ldb, xtrue, ldx, &grid); PStatPrint(&options, &stat, &grid); /* Print the statistics. */ PStatFree(&stat); Destroy_CompRowLoc_Matrix_dist(&A); /* Deallocate storage of matrix A. */ SUPERLU_FREE(b); /* Free storage of right-hand side. */ /* ------------------------------------------------------------ NOW WE SOLVE ANOTHER LINEAR SYSTEM. THE MATRIX A HAS THE SAME SPARSITY PATTERN AND THE SIMILAR NUMERICAL VALUES AS THAT IN A PREVIOUS SYSTEM. ------------------------------------------------------------*/ options.Fact = SamePattern_SameRowPerm; PStatInit(&stat); /* Initialize the statistics variables. */ /* Set up the local A in NR_loc format */ dCreate_CompRowLoc_Matrix_dist(&A, m, n, nnz_loc, m_loc, fst_row, nzval1, colind1, rowptr1, SLU_NR_loc, SLU_D, SLU_GE); /* Solve the linear system. */ pdgssvx(&options, &A, &ScalePermstruct, b1, ldb, nrhs, &grid, &LUstruct, &SOLVEstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ if ( !iam ) printf("Solve a system with the same pattern and similar values.\n"); pdinf_norm_error(iam, m_loc, nrhs, b1, ldb, xtrue, ldx, &grid); /* Print the statistics. */ PStatPrint(&options, &stat, &grid); /* ------------------------------------------------------------ DEALLOCATE STORAGE. ------------------------------------------------------------*/ PStatFree(&stat); Destroy_CompRowLoc_Matrix_dist(&A); /* Deallocate storage of matrix A. */ Destroy_LU(n, &grid, &LUstruct); /* Deallocate storage associated with the L and U matrices. */ ScalePermstructFree(&ScalePermstruct); LUstructFree(&LUstruct); /* Deallocate the structure of L and U.*/ if ( options.SolveInitialized ) { dSolveFinalize(&options, &SOLVEstruct); } SUPERLU_FREE(b1); /* Free storage of right-hand side. */ SUPERLU_FREE(xtrue); /* Free storage of the exact solution. */ SUPERLU_FREE(berr); /* ------------------------------------------------------------ RELEASE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ out: superlu_gridexit(&grid); /* ------------------------------------------------------------ TERMINATES THE MPI EXECUTION ENVIRONMENT. ------------------------------------------------------------*/ MPI_Finalize(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit main()"); #endif } int cpp_defs() { printf(".. CPP definitions:\n"); #if ( PRNTlevel>=1 ) printf("\tPRNTlevel = %d\n", PRNTlevel); #endif #if ( DEBUGlevel>=1 ) printf("\tDEBUGlevel = %d\n", DEBUGlevel); #endif #if ( PROFlevel>=1 ) printf("\tPROFlevel = %d\n", PROFlevel); #endif #if ( StaticPivot>=1 ) printf("\tStaticPivot = %d\n", StaticPivot); #endif printf("....\n"); return 0; } SuperLU_DIST_5.3.0/EXAMPLE/pddrive4.c0000644013363400111340000002155113233431301015620 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief This example illustrates how to divide up the processes into subgroups * *
 * -- Distributed SuperLU routine (version 4.1) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * March 15, 2003
 * April 5, 2015
 * 
*/ #include #include "superlu_ddefs.h" /*! \brief * *
 * Purpose
 * =======
 *
 * The driver program PDDRIVE4.
 *
 * This example illustrates how to divide up the processes into
 * subgroups (multiple grids) such that each subgroup solves a linear
 * system independently from the other.
 *
 * In this example, there are 2 subgroups:
 *  1. subgroup 1 consists of processes 0 to 5 arranged as
 *     a 2-by-3 process grid.
 *  2. subgroup 2 consists of processes 6 to 9 arranged as
 *     a 2-by-2 process grid.
 *
 * With MPICH,  program may be run by typing:
 *    mpiexec -n 10 pddrive4 big.rua
 * 
*/ int main(int argc, char *argv[]) { superlu_dist_options_t options; SuperLUStat_t stat; SuperMatrix A; ScalePermstruct_t ScalePermstruct; LUstruct_t LUstruct; SOLVEstruct_t SOLVEstruct; gridinfo_t grid1, grid2; double *berr; double *a, *b, *xtrue; int_t *asub, *xa; int_t i, j, m, n; int nprow, npcol, ldumap, p; int_t usermap[6]; int iam, info, ldb, ldx, nprocs; int nrhs = 1; /* Number of right-hand side. */ char **cpp, c; FILE *fp, *fopen(); int cpp_defs(); /* ------------------------------------------------------------ INITIALIZE MPI ENVIRONMENT. ------------------------------------------------------------*/ MPI_Init( &argc, &argv ); MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); if ( nprocs < 10 ) { fprintf(stderr, "Requires at least 10 processes\n"); exit(-1); } /* Parse command line argv[]. */ for (cpp = argv+1; *cpp; ++cpp) { if ( **cpp == '-' ) { c = *(*cpp+1); ++cpp; switch (c) { case 'h': printf("Options:\n"); printf("\t-r : process rows (default %d)\n", nprow); printf("\t-c : process columns (default %d)\n", npcol); exit(0); break; case 'r': nprow = atoi(*cpp); break; case 'c': npcol = atoi(*cpp); break; } } else { /* Last arg is considered a filename */ if ( !(fp = fopen(*cpp, "r")) ) { ABORT("File does not exist"); } break; } } /* ------------------------------------------------------------ INITIALIZE THE SUPERLU PROCESS GRID 1. ------------------------------------------------------------*/ nprow = 2; npcol = 3; ldumap = 2; p = 0; /* Grid 1 starts from process 0. */ for (i = 0; i < nprow; ++i) for (j = 0; j < npcol; ++j) usermap[i+j*ldumap] = p++; superlu_gridmap(MPI_COMM_WORLD, nprow, npcol, usermap, ldumap, &grid1); /* ------------------------------------------------------------ INITIALIZE THE SUPERLU PROCESS GRID 2. ------------------------------------------------------------*/ nprow = 2; npcol = 2; ldumap = 2; p = 6; /* Grid 2 starts from process 6. */ for (i = 0; i < nprow; ++i) for (j = 0; j < npcol; ++j) usermap[i+j*ldumap] = p++; superlu_gridmap(MPI_COMM_WORLD, nprow, npcol, usermap, ldumap, &grid2); /* Bail out if I do not belong in any of the 2 grids. */ MPI_Comm_rank( MPI_COMM_WORLD, &iam ); if ( iam >= 10 ) goto out; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter main()"); #endif if ( iam >= 0 && iam < 6 ) { /* I am in grid 1. */ iam = grid1.iam; /* Get the logical number in the new grid. */ /* ------------------------------------------------------------ GET THE MATRIX FROM FILE AND SETUP THE RIGHT HAND SIDE. ------------------------------------------------------------*/ dcreate_matrix(&A, nrhs, &b, &ldb, &xtrue, &ldx, fp, &grid1); if ( !(berr = doubleMalloc_dist(nrhs)) ) ABORT("Malloc fails for berr[]."); /* ------------------------------------------------------------ NOW WE SOLVE THE LINEAR SYSTEM. ------------------------------------------------------------*/ /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = METIS_AT_PLUS_A; options.RowPerm = LargeDiag; options.ReplaceTinyPivot = NO; options.Trans = NOTRANS; options.IterRefine = DOUBLE; options.SolveInitialized = NO; options.RefineInitialized = NO; options.PrintStat = YES; */ set_default_options_dist(&options); if (!iam) { print_sp_ienv_dist(&options); print_options_dist(&options); } m = A.nrow; n = A.ncol; /* Initialize ScalePermstruct and LUstruct. */ ScalePermstructInit(m, n, &ScalePermstruct); LUstructInit(n, &LUstruct); /* Initialize the statistics variables. */ PStatInit(&stat); /* Call the linear equation solver. */ pdgssvx(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid1, &LUstruct, &SOLVEstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ pdinf_norm_error(iam, ((NRformat_loc *)A.Store)->m_loc, nrhs, b, ldb, xtrue, ldx, &grid1); /* Print the statistics. */ PStatPrint(&options, &stat, &grid1); /* ------------------------------------------------------------ DEALLOCATE STORAGE. ------------------------------------------------------------*/ PStatFree(&stat); Destroy_CompRowLoc_Matrix_dist(&A); ScalePermstructFree(&ScalePermstruct); Destroy_LU(n, &grid1, &LUstruct); LUstructFree(&LUstruct); if ( options.SolveInitialized ) { dSolveFinalize(&options, &SOLVEstruct); } SUPERLU_FREE(b); SUPERLU_FREE(xtrue); SUPERLU_FREE(berr); } else { /* I am in grid 2. */ iam = grid2.iam; /* Get the logical number in the new grid. */ /* ------------------------------------------------------------ GET THE MATRIX FROM FILE AND SETUP THE RIGHT HAND SIDE. ------------------------------------------------------------*/ dcreate_matrix(&A, nrhs, &b, &ldb, &xtrue, &ldx, fp, &grid2); if ( !(berr = doubleMalloc_dist(nrhs)) ) ABORT("Malloc fails for berr[]."); /* ------------------------------------------------------------ NOW WE SOLVE THE LINEAR SYSTEM. ------------------------------------------------------------*/ /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = MMD_AT_PLUS_A; options.RowPerm = LargeDiag; options.ReplaceTinyPivot = YES; options.Trans = NOTRANS; options.IterRefine = DOUBLE; options.SolveInitialized = NO; options.RefineInitialized = NO; options.PrintStat = YES; */ set_default_options_dist(&options); m = A.nrow; n = A.ncol; /* Initialize ScalePermstruct and LUstruct. */ ScalePermstructInit(m, n, &ScalePermstruct); LUstructInit(n, &LUstruct); /* Initialize the statistics variables. */ PStatInit(&stat); /* Call the linear equation solver. */ pdgssvx(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid2, &LUstruct, &SOLVEstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ pdinf_norm_error(iam, ((NRformat_loc *)A.Store)->m_loc, nrhs, b, ldb, xtrue, ldx, &grid2); /* Print the statistics. */ PStatPrint(&options, &stat, &grid2); /* ------------------------------------------------------------ DEALLOCATE STORAGE. ------------------------------------------------------------*/ PStatFree(&stat); Destroy_CompRowLoc_Matrix_dist(&A); ScalePermstructFree(&ScalePermstruct); Destroy_LU(n, &grid2, &LUstruct); LUstructFree(&LUstruct); if ( options.SolveInitialized ) { dSolveFinalize(&options, &SOLVEstruct); } SUPERLU_FREE(b); SUPERLU_FREE(xtrue); SUPERLU_FREE(berr); } /* ------------------------------------------------------------ RELEASE THE SUPERLU PROCESS GRIDS. ------------------------------------------------------------*/ superlu_gridexit(&grid1); superlu_gridexit(&grid2); out: /* ------------------------------------------------------------ TERMINATES THE MPI EXECUTION ENVIRONMENT. ------------------------------------------------------------*/ MPI_Finalize(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit main()"); #endif } SuperLU_DIST_5.3.0/EXAMPLE/dreadhb.c0000644013363400111340000002636513233431301015500 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Read a DOUBLE PRECISION matrix stored in Harwell-Boeing format * *
 * -- Distributed SuperLU routine (version 1.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 * 
*/ #include #include #include #include "superlu_ddefs.h" /* * Prototypes */ static void ReadVector(FILE *, int_t, int_t *, int_t, int_t); static void dReadValues(FILE *, int_t, double *, int_t, int_t); extern void FormFullA(int_t, int_t *, double **, int_t **, int_t **); static int DumpLine(FILE *); static int ParseIntFormat(char *, int_t *, int_t *); static int ParseFloatFormat(char *, int_t *, int_t *); /*! \brief * *
 * Purpose
 * =======
 * 
 * Read a DOUBLE PRECISION matrix stored in Harwell-Boeing format 
 * as described below.
 * 
 * Line 1 (A72,A8) 
 *  	Col. 1 - 72   Title (TITLE) 
 *	Col. 73 - 80  Key (KEY) 
 * 
 * Line 2 (5I14) 
 * 	Col. 1 - 14   Total number of lines excluding header (TOTCRD) 
 * 	Col. 15 - 28  Number of lines for pointers (PTRCRD) 
 * 	Col. 29 - 42  Number of lines for row (or variable) indices (INDCRD) 
 * 	Col. 43 - 56  Number of lines for numerical values (VALCRD) 
 *	Col. 57 - 70  Number of lines for right-hand sides (RHSCRD) 
 *                    (including starting guesses and solution vectors 
 *		       if present) 
 *           	      (zero indicates no right-hand side data is present) 
 *
 * Line 3 (A3, 11X, 4I14) 
 *   	Col. 1 - 3    Matrix type (see below) (MXTYPE) 
 * 	Col. 15 - 28  Number of rows (or variables) (NROW) 
 * 	Col. 29 - 42  Number of columns (or elements) (NCOL) 
 *	Col. 43 - 56  Number of row (or variable) indices (NNZERO) 
 *	              (equal to number of entries for assembled matrices) 
 * 	Col. 57 - 70  Number of elemental matrix entries (NELTVL) 
 *	              (zero in the case of assembled matrices) 
 * Line 4 (2A16, 2A20) 
 * 	Col. 1 - 16   Format for pointers (PTRFMT) 
 *	Col. 17 - 32  Format for row (or variable) indices (INDFMT) 
 *	Col. 33 - 52  Format for numerical values of coefficient matrix (VALFMT) 
 * 	Col. 53 - 72 Format for numerical values of right-hand sides (RHSFMT) 
 *
 * Line 5 (A3, 11X, 2I14) Only present if there are right-hand sides present 
 *    	Col. 1 	      Right-hand side type: 
 *	         	  F for full storage or M for same format as matrix 
 *    	Col. 2        G if a starting vector(s) (Guess) is supplied. (RHSTYP) 
 *    	Col. 3        X if an exact solution vector(s) is supplied. 
 *	Col. 15 - 28  Number of right-hand sides (NRHS) 
 *	Col. 29 - 42  Number of row indices (NRHSIX) 
 *          	      (ignored in case of unassembled matrices) 
 *
 * The three character type field on line 3 describes the matrix type. 
 * The following table lists the permitted values for each of the three 
 * characters. As an example of the type field, RSA denotes that the matrix 
 * is real, symmetric, and assembled. 
 *
 * First Character: 
 *	R Real matrix 
 *	C Complex matrix 
 *	P Pattern only (no numerical values supplied) 
 *
 * Second Character: 
 *	S Symmetric 
 *	U Unsymmetric 
 *	H Hermitian 
 *	Z Skew symmetric 
 *	R Rectangular 
 *
 * Third Character: 
 *	A Assembled 
 *	E Elemental matrices (unassembled) 
 * 
*/ void dreadhb_dist(int iam, FILE *fp, int_t *nrow, int_t *ncol, int_t *nonz, double **nzval, int_t **rowind, int_t **colptr) { register int_t i, numer_lines, rhscrd = 0; int_t tmp, colnum, colsize, rownum, rowsize, valnum, valsize; char buf[100], type[4]; int_t sym; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(0, "Enter dreadhb_dist()"); #endif /* Line 1 */ fgets(buf, 100, fp); /* Line 2 */ for (i=0; i<5; i++) { fscanf(fp, "%14c", buf); buf[14] = 0; tmp = atoi(buf); /*sscanf(buf, "%d", &tmp);*/ if (i == 3) numer_lines = tmp; if (i == 4 && tmp) rhscrd = tmp; } DumpLine(fp); /* Line 3 */ fscanf(fp, "%3c", type); fscanf(fp, "%11c", buf); /* pad */ type[3] = 0; #if ( DEBUGlevel>=1 ) if ( !iam ) printf("Matrix type %s\n", type); #endif fscanf(fp, "%14c", buf); *nrow = atoi(buf); fscanf(fp, "%14c", buf); *ncol = atoi(buf); fscanf(fp, "%14c", buf); *nonz = atoi(buf); fscanf(fp, "%14c", buf); tmp = atoi(buf); if (tmp != 0) if ( !iam ) printf("This is not an assembled matrix!\n"); if (*nrow != *ncol) if ( !iam ) printf("Matrix is not square.\n"); DumpLine(fp); /* Allocate storage for the three arrays ( nzval, rowind, colptr ) */ dallocateA_dist(*ncol, *nonz, nzval, rowind, colptr); /* Line 4: format statement */ fscanf(fp, "%16c", buf); ParseIntFormat(buf, &colnum, &colsize); fscanf(fp, "%16c", buf); ParseIntFormat(buf, &rownum, &rowsize); fscanf(fp, "%20c", buf); ParseFloatFormat(buf, &valnum, &valsize); fscanf(fp, "%20c", buf); DumpLine(fp); /* Line 5: right-hand side */ if ( rhscrd ) DumpLine(fp); /* skip RHSFMT */ #if ( DEBUGlevel>=1 ) if ( !iam ) { printf("%d rows, %d nonzeros\n", *nrow, *nonz); printf("colnum %d, colsize %d\n", colnum, colsize); printf("rownum %d, rowsize %d\n", rownum, rowsize); printf("valnum %d, valsize %d\n", valnum, valsize); } #endif ReadVector(fp, *ncol+1, *colptr, colnum, colsize); #if ( DEBUGlevel>=1 ) if ( !iam ) printf("read colptr[%d] = %d\n", *ncol, (*colptr)[*ncol]); #endif ReadVector(fp, *nonz, *rowind, rownum, rowsize); #if ( DEBUGlevel>=1 ) if ( !iam ) printf("read rowind[%d] = %d\n", *nonz-1, (*rowind)[*nonz-1]); #endif if ( numer_lines ) { dReadValues(fp, *nonz, *nzval, valnum, valsize); #if ( DEBUGlevel>=1 ) if ( !iam ) printf("read nzval[%d] = %e\n", *nonz-1, (*nzval)[*nonz-1]); #endif } sym = (type[1] == 'S' || type[1] == 's'); if ( sym ) { FormFullA(*ncol, nonz, nzval, rowind, colptr); } fclose(fp); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(0, "Exit dreadhb_dist()"); #endif } /* Eat up the rest of the current line */ static int DumpLine(FILE *fp) { register int c; while ((c = fgetc(fp)) != '\n') ; return 0; } static int ParseIntFormat(char *buf, int_t *num, int_t *size) { char *tmp; tmp = buf; while (*tmp++ != '(') ; *num = atoi(tmp); while (*tmp != 'I' && *tmp != 'i') ++tmp; ++tmp; *size = atoi(tmp); return 0; } static int ParseFloatFormat(char *buf, int_t *num, int_t *size) { char *tmp, *period; tmp = buf; while (*tmp++ != '(') ; *num = atoi(tmp); while (*tmp != 'E' && *tmp != 'e' && *tmp != 'D' && *tmp != 'd' && *tmp != 'F' && *tmp != 'f') { /* May find kP before nE/nD/nF, like (1P6F13.6). In this case the num picked up refers to P, which should be skipped. */ if (*tmp=='p' || *tmp=='P') { ++tmp; *num = atoi(tmp); /*sscanf(tmp, "%d", num);*/ } else { ++tmp; } } ++tmp; period = tmp; while (*period != '.' && *period != ')') ++period ; *period = '\0'; *size = atoi(tmp); return 0; } static void ReadVector(FILE *fp, int_t n, int_t *where, int_t perline, int_t persize) { register int_t i, j, item; char tmp, buf[100]; i = 0; while (i < n) { fgets(buf, 100, fp); /* read a line at a time */ for (j=0; j * On input, nonz/nzval/rowind/colptr represents lower part of a symmetric * matrix. On exit, it represents the full matrix with lower and upper parts. *
*/ extern void FormFullA(int_t n, int_t *nonz, double **nzval, int_t **rowind, int_t **colptr) { register int_t i, j, k, col, new_nnz; int_t *t_rowind, *t_colptr, *al_rowind, *al_colptr, *a_rowind, *a_colptr; int_t *marker; double *t_val, *al_val, *a_val; al_rowind = *rowind; al_colptr = *colptr; al_val = *nzval; if ( !(marker =(int_t *) SUPERLU_MALLOC( (n+1) * sizeof(int_t)) ) ) ABORT("SUPERLU_MALLOC fails for marker[]"); if ( !(t_colptr = (int_t *) SUPERLU_MALLOC( (n+1) * sizeof(int_t)) ) ) ABORT("SUPERLU_MALLOC t_colptr[]"); if ( !(t_rowind = (int_t *) SUPERLU_MALLOC( *nonz * sizeof(int_t)) ) ) ABORT("SUPERLU_MALLOC fails for t_rowind[]"); if ( !(t_val = (double*) SUPERLU_MALLOC( *nonz * sizeof(double)) ) ) ABORT("SUPERLU_MALLOC fails for t_val[]"); /* Get counts of each column of T, and set up column pointers */ for (i = 0; i < n; ++i) marker[i] = 0; for (j = 0; j < n; ++j) { for (i = al_colptr[j]; i < al_colptr[j+1]; ++i) ++marker[al_rowind[i]]; } t_colptr[0] = 0; for (i = 0; i < n; ++i) { t_colptr[i+1] = t_colptr[i] + marker[i]; marker[i] = t_colptr[i]; } /* Transpose matrix A to T */ for (j = 0; j < n; ++j) for (i = al_colptr[j]; i < al_colptr[j+1]; ++i) { col = al_rowind[i]; t_rowind[marker[col]] = j; t_val[marker[col]] = al_val[i]; ++marker[col]; } new_nnz = *nonz * 2 - n; if ( !(a_colptr = (int_t *) SUPERLU_MALLOC( (n+1) * sizeof(int_t)) ) ) ABORT("SUPERLU_MALLOC a_colptr[]"); if ( !(a_rowind = (int_t *) SUPERLU_MALLOC( new_nnz * sizeof(int_t)) ) ) ABORT("SUPERLU_MALLOC fails for a_rowind[]"); if ( !(a_val = (double*) SUPERLU_MALLOC( new_nnz * sizeof(double)) ) ) ABORT("SUPERLU_MALLOC fails for a_val[]"); a_colptr[0] = 0; k = 0; for (j = 0; j < n; ++j) { for (i = t_colptr[j]; i < t_colptr[j+1]; ++i) { if ( t_rowind[i] != j ) { /* not diagonal */ a_rowind[k] = t_rowind[i]; a_val[k] = t_val[i]; #ifdef DEBUG if ( fabs(a_val[k]) < 4.047e-300 ) printf("%5d: %e\n", k, a_val[k]); #endif ++k; } } for (i = al_colptr[j]; i < al_colptr[j+1]; ++i) { a_rowind[k] = al_rowind[i]; a_val[k] = al_val[i]; #ifdef DEBUG if ( fabs(a_val[k]) < 4.047e-300 ) printf("%5d: %e\n", k, a_val[k]); #endif ++k; } a_colptr[j+1] = k; } printf("FormFullA: new_nnz = %d, k = %d\n", new_nnz, k); SUPERLU_FREE(al_val); SUPERLU_FREE(al_rowind); SUPERLU_FREE(al_colptr); SUPERLU_FREE(marker); SUPERLU_FREE(t_val); SUPERLU_FREE(t_rowind); SUPERLU_FREE(t_colptr); *nzval = a_val; *rowind = a_rowind; *colptr = a_colptr; *nonz = new_nnz; } SuperLU_DIST_5.3.0/EXAMPLE/CMakeLists.txt0000644013363400111340000001051213233431301016466 0ustar xiaoyessginclude_directories(${SuperLU_DIST_SOURCE_DIR}/SRC) # Libs linked to all of the examples set(all_link_libs superlu_dist ${BLAS_LIB}) if (NOT MSVC) list(APPEND all_link_libs m) endif () function(add_superlu_dist_example target input nprow npcol) set(EXAMPLE_INPUT "${SuperLU_DIST_SOURCE_DIR}/EXAMPLE/${input}") set(EXAMPLE_OUTPUT "${SuperLU_DIST_BINARY_DIR}/EXAMPLE/${target}.out") ## get_target_property(TEST_LOC ${target} LOCATION) set(EXAMPLE_LOC ${CMAKE_CURRENT_BINARY_DIR}) MATH( EXPR procs "${nprow}*${npcol}" ) # message("MPIEXEC_FLAG is ${MPIEXEC_NUMPROC_FLAG}") # corresponding to mpiexec -n 4 pddrive -r -c g20.rua add_test(${target} ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} ${procs} ${MPIEXEC_PREFLAGS} ${target} ${MPIEXEC_POSTFLAGS} -r "${nprow}" -c "${npcol}" "${EXAMPLE_INPUT}") # add_test(NAME ${target} COMMAND "${CMAKE_COMMAND}" # -DTEST=${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} ${procs} # ${MPIEXEC_PREFLAGS} ${target} ${MPIEXEC_POSTFLAGS} -r "${nprow}" -c "${npcol}" "${TEST_INPUT}" # -DOUTPUT=${target}.out # -P "${SuperLU_DIST_SOURCE_DIR}/EXAMPLE/runexample.cmake" ) # MPI variables: # ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} PROCS # ${MPIEXEC_PREFLAGS} EXECUTABLE ${MPIEXEC_POSTFLAGS} ARGS) endfunction(add_superlu_dist_example) if(enable_double) set(DEXM pddrive.c dcreate_matrix.c) add_executable(pddrive ${DEXM}) target_link_libraries(pddrive ${all_link_libs}) set(DEXM1 pddrive1.c dcreate_matrix.c) add_executable(pddrive1 ${DEXM1}) target_link_libraries(pddrive1 ${all_link_libs}) add_superlu_dist_example(pddrive1 big.rua 2 2) set(DEXM2 pddrive2.c dcreate_matrix.c dcreate_matrix_perturbed.c) add_executable(pddrive2 ${DEXM2}) target_link_libraries(pddrive2 ${all_link_libs}) add_superlu_dist_example(pddrive2 big.rua 2 2) set(DEXM3 pddrive3.c dcreate_matrix.c) add_executable(pddrive3 ${DEXM3}) target_link_libraries(pddrive3 ${all_link_libs}) add_superlu_dist_example(pddrive3 big.rua 2 2) set(DEXM4 pddrive4.c dcreate_matrix.c) add_executable(pddrive4 ${DEXM4}) target_link_libraries(pddrive4 ${all_link_libs}) set(DEXMG pddrive_ABglobal.c) add_executable(pddrive_ABglobal ${DEXMG}) target_link_libraries(pddrive_ABglobal ${all_link_libs}) set(DEXMG1 pddrive1_ABglobal.c) add_executable(pddrive1_ABglobal ${DEXMG1}) target_link_libraries(pddrive1_ABglobal ${all_link_libs}) set(DEXMG2 pddrive2_ABglobal.c) add_executable(pddrive2_ABglobal ${DEXMG2}) target_link_libraries(pddrive2_ABglobal ${all_link_libs}) set(DEXMG3 pddrive3_ABglobal.c) add_executable(pddrive3_ABglobal ${DEXMG3}) target_link_libraries(pddrive3_ABglobal ${all_link_libs}) set(DEXMG4 pddrive4_ABglobal.c) add_executable(pddrive4_ABglobal ${DEXMG4}) target_link_libraries(pddrive4_ABglobal ${all_link_libs}) endif() if(enable_complex16) set(ZEXM pzdrive.c zcreate_matrix.c) add_executable(pzdrive ${ZEXM}) target_link_libraries(pzdrive ${all_link_libs}) set(ZEXM1 pzdrive1.c zcreate_matrix.c) add_executable(pzdrive1 ${ZEXM1}) target_link_libraries(pzdrive1 ${all_link_libs}) add_superlu_dist_example(pzdrive1 cg20.cua 2 2) set(ZEXM2 pzdrive2.c zcreate_matrix.c zcreate_matrix_perturbed.c) add_executable(pzdrive2 ${ZEXM2}) target_link_libraries(pzdrive2 ${all_link_libs}) add_superlu_dist_example(pzdrive2 cg20.cua 2 2) set(ZEXM3 pzdrive3.c zcreate_matrix.c) add_executable(pzdrive3 ${ZEXM3}) target_link_libraries(pzdrive3 ${all_link_libs}) add_superlu_dist_example(pzdrive3 cg20.cua 2 2) set(ZEXM4 pzdrive4.c zcreate_matrix.c) add_executable(pzdrive4 ${ZEXM4}) target_link_libraries(pzdrive4 ${all_link_libs}) set(ZEXMG pzdrive_ABglobal.c) add_executable(pzdrive_ABglobal ${ZEXMG}) target_link_libraries(pzdrive_ABglobal ${all_link_libs}) set(ZEXMG1 pzdrive1_ABglobal.c) add_executable(pzdrive1_ABglobal ${ZEXMG1}) target_link_libraries(pzdrive1_ABglobal ${all_link_libs}) set(ZEXMG2 pzdrive2_ABglobal.c) add_executable(pzdrive2_ABglobal ${ZEXMG2}) target_link_libraries(pzdrive2_ABglobal ${all_link_libs}) set(ZEXMG3 pzdrive3_ABglobal.c) add_executable(pzdrive3_ABglobal ${ZEXMG3}) target_link_libraries(pzdrive3_ABglobal ${all_link_libs}) set(ZEXMG4 pzdrive4_ABglobal.c) add_executable(pzdrive4_ABglobal ${ZEXMG4}) target_link_libraries(pzdrive4_ABglobal ${all_link_libs}) endif() SuperLU_DIST_5.3.0/EXAMPLE/pzdrive1_ABglobal.c0000644013363400111340000002161213233431301017364 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Driver program for pzgssvx_ABglobal example * *
 * -- Distributed SuperLU routine (version 4.1) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 * April 5, 2015
 * 
*/ #include #include "superlu_zdefs.h" /*! \brief * *
 * Purpose
 * =======
 *
 * The driver program pzdrive1_ABglobal.
 *
 * This example illustrates how to use pzgssvx_ABglobal to
 * solve systems with the same A but different right-hand side.
 * In this case, we factorize A only once in the first call to
 * pzgssvx_ABglobal, and reuse the following data structures
 * in the subsequent call to pzgssvx_ABglobal:
 *        ScalePermstruct  : DiagScale, R, C, perm_r, perm_c
 *        LUstruct         : Glu_persist, Llu
 * 
 * On an IBM SP, the program may be run by typing:
 *    poe pzdrive1_ABglobal -r  -c   -procs 

*

*/ int main(int argc, char *argv[]) { superlu_dist_options_t options; SuperLUStat_t stat; SuperMatrix A; ScalePermstruct_t ScalePermstruct; LUstruct_t LUstruct; gridinfo_t grid; double *berr; doublecomplex *a, *b, *b1, *xtrue; int_t *asub, *xa; int_t i, j, m, n, nnz; int_t nprow, npcol; int iam, info, ldb, ldx, nrhs; char trans[1]; char **cpp, c; FILE *fp, *fopen(); extern int cpp_defs(); /* prototypes */ extern void LUstructInit(const int_t, LUstruct_t *); extern void LUstructFree(LUstruct_t *); extern void Destroy_LU(int_t, gridinfo_t *, LUstruct_t *); nprow = 1; /* Default process rows. */ npcol = 1; /* Default process columns. */ nrhs = 1; /* Number of right-hand side. */ /* ------------------------------------------------------------ INITIALIZE MPI ENVIRONMENT. ------------------------------------------------------------*/ MPI_Init( &argc, &argv ); /* Parse command line argv[]. */ for (cpp = argv+1; *cpp; ++cpp) { if ( **cpp == '-' ) { c = *(*cpp+1); ++cpp; switch (c) { case 'h': printf("Options:\n"); printf("\t-r : process rows (default " IFMT ")\n", nprow); printf("\t-c : process columns (default " IFMT ")\n", npcol); exit(0); break; case 'r': nprow = atoi(*cpp); break; case 'c': npcol = atoi(*cpp); break; } } else { /* Last arg is considered a filename */ if ( !(fp = fopen(*cpp, "r")) ) { ABORT("File does not exist"); } break; } } /* ------------------------------------------------------------ INITIALIZE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ superlu_gridinit(MPI_COMM_WORLD, nprow, npcol, &grid); /* Bail out if I do not belong in the grid. */ iam = grid.iam; if ( iam >= nprow * npcol ) goto out; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter main()"); #endif /* ------------------------------------------------------------ PROCESS 0 READS THE MATRIX A, AND THEN BROADCASTS IT TO ALL THE OTHER PROCESSES. ------------------------------------------------------------*/ if ( !iam ) { /* Print the CPP definitions. */ cpp_defs(); /* Read the matrix stored on disk in Harwell-Boeing format. */ zreadhb_dist(iam, fp, &m, &n, &nnz, &a, &asub, &xa); printf("Input matrix file: %s\n", *cpp); printf("\tDimension\t" IFMT "x" IFMT "\t # nonzeros " IFMT "\n", m, n, nnz); printf("\tProcess grid\t%d X %d\n", (int) grid.nprow, (int) grid.npcol); /* Broadcast matrix A to the other PEs. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( a, nnz, SuperLU_MPI_DOUBLE_COMPLEX, 0, grid.comm ); MPI_Bcast( asub, nnz, mpi_int_t, 0, grid.comm ); MPI_Bcast( xa, n+1, mpi_int_t, 0, grid.comm ); } else { /* Receive matrix A from PE 0. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid.comm ); /* Allocate storage for compressed column representation. */ zallocateA_dist(n, nnz, &a, &asub, &xa); MPI_Bcast( a, nnz, SuperLU_MPI_DOUBLE_COMPLEX, 0, grid.comm ); MPI_Bcast( asub, nnz, mpi_int_t, 0, grid.comm ); MPI_Bcast( xa, n+1, mpi_int_t, 0, grid.comm ); } /* Create compressed column matrix for A. */ zCreate_CompCol_Matrix_dist(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_Z, SLU_GE); /* Generate the exact solution and compute the right-hand side. */ if ( !(b = doublecomplexMalloc_dist(m * nrhs)) ) ABORT("Malloc fails for b[]"); if ( !(b1 = doublecomplexMalloc_dist(m * nrhs)) ) ABORT("Malloc fails for b1[]"); if ( !(xtrue = doublecomplexMalloc_dist(n*nrhs)) ) ABORT("Malloc fails for xtrue[]"); *trans = 'N'; ldx = n; ldb = m; zGenXtrue_dist(n, nrhs, xtrue, ldx); zFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb); for (j = 0; j < nrhs; ++j) for (i = 0; i < m; ++i) b1[i+j*ldb] = b[i+j*ldb]; if ( !(berr = doubleMalloc_dist(nrhs)) ) ABORT("Malloc fails for berr[]."); /* ------------------------------------------------------------ WE SOLVE THE LINEAR SYSTEM FOR THE FIRST TIME. ------------------------------------------------------------*/ /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = METIS_AT_PLUS_A; options.RowPerm = LargeDiag; options.ReplaceTinyPivot = YES; options.Trans = NOTRANS; options.IterRefine = DOUBLE; options.SolveInitialized = NO; options.RefineInitialized = NO; options.PrintStat = YES; */ set_default_options_dist(&options); if (!iam) { print_sp_ienv_dist(&options); print_options_dist(&options); } /* Initialize ScalePermstruct and LUstruct. */ ScalePermstructInit(m, n, &ScalePermstruct); LUstructInit(n, &LUstruct); /* Initialize the statistics variables. */ PStatInit(&stat); /* Call the linear equation solver: factorize and solve. */ pzgssvx_ABglobal(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid, &LUstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ if ( !iam ) { zinf_norm_error_dist(n, nrhs, b, ldb, xtrue, ldx, &grid); } PStatPrint(&options, &stat, &grid); /* Print the statistics. */ PStatFree(&stat); /* ------------------------------------------------------------ NOW WE SOLVE ANOTHER SYSTEM WITH THE SAME A BUT DIFFERENT RIGHT-HAND SIDE, WE WILL USE THE EXISTING L AND U FACTORS IN LUSTRUCT OBTAINED FROM A PREVIOUS FATORIZATION. ------------------------------------------------------------*/ options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */ PStatInit(&stat); /* Initialize the statistics variables. */ pzgssvx_ABglobal(&options, &A, &ScalePermstruct, b1, ldb, nrhs, &grid, &LUstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ if ( !iam ) { printf("Solve the system with a different B.\n"); zinf_norm_error_dist(n, nrhs, b1, ldb, xtrue, ldx, &grid); } /* Print the statistics. */ PStatPrint(&options, &stat, &grid); /* ------------------------------------------------------------ DEALLOCATE STORAGE. ------------------------------------------------------------*/ PStatFree(&stat); Destroy_CompCol_Matrix_dist(&A); Destroy_LU(n, &grid, &LUstruct); ScalePermstructFree(&ScalePermstruct); LUstructFree(&LUstruct); SUPERLU_FREE(b); SUPERLU_FREE(b1); SUPERLU_FREE(xtrue); SUPERLU_FREE(berr); /* ------------------------------------------------------------ RELEASE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ out: superlu_gridexit(&grid); /* ------------------------------------------------------------ TERMINATES THE MPI EXECUTION ENVIRONMENT. ------------------------------------------------------------*/ MPI_Finalize(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit main()"); #endif } int cpp_defs() { printf(".. CPP definitions:\n"); #if ( PRNTlevel>=1 ) printf("\tPRNTlevel = %d\n", PRNTlevel); #endif #if ( DEBUGlevel>=1 ) printf("\tDEBUGlevel = %d\n", DEBUGlevel); #endif #if ( PROFlevel>=1 ) printf("\tPROFlevel = %d\n", PROFlevel); #endif #if ( StaticPivot>=1 ) printf("\tStaticPivot = %d\n", StaticPivot); #endif printf("....\n"); return 0; } SuperLU_DIST_5.3.0/EXAMPLE/pzdrive2_ABglobal.c0000644013363400111340000002327613233431301017375 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Driver program for pzgssvx_ABglobal example * *
 * -- Distributed SuperLU routine (version 4.1) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 * April 5, 2015
 * 
*/ #include #include "superlu_zdefs.h" /*! \brief * *
 * Purpose
 * =======
 *
 * The driver program pzdrive2_ABglobal.
 *
 * This example illustrates how to use pzgssvx_ABglobal to solve
 * systems repeatedly with the same sparsity pattern of matrix A.
 * In this case, the column permutation vector ScalePermstruct->perm_c is
 * computed once.  The following data structures will be reused in the
 * subsequent call to pzgssvx_ABglobal:
 *        ScalePermstruct : perm_c
 *        LUstruct        : etree
 *
 * On an IBM SP, the program may be run by typing:
 *    poe pzdrive2_ABglobal -r  -c   -procs 

*

*/ int main(int argc, char *argv[]) { superlu_dist_options_t options; SuperLUStat_t stat; SuperMatrix A; ScalePermstruct_t ScalePermstruct; LUstruct_t LUstruct; gridinfo_t grid; double *berr; doublecomplex *a, *a1, *b, *b1, *xtrue; int_t *asub, *asub1, *xa, *xa1; int_t i, j, m, n, nnz; int_t nprow, npcol; int iam, info, ldb, ldx, nrhs; char trans[1]; char **cpp, c; FILE *fp, *fopen(); extern int cpp_defs(); /* prototypes */ extern void LUstructInit(const int_t, LUstruct_t *); extern void LUstructFree(LUstruct_t *); extern void Destroy_LU(int_t, gridinfo_t *, LUstruct_t *); nprow = 1; /* Default process rows. */ npcol = 1; /* Default process columns. */ nrhs = 1; /* Number of right-hand side. */ /* ------------------------------------------------------------ INITIALIZE MPI ENVIRONMENT. ------------------------------------------------------------*/ MPI_Init( &argc, &argv ); /* Parse command line argv[]. */ for (cpp = argv+1; *cpp; ++cpp) { if ( **cpp == '-' ) { c = *(*cpp+1); ++cpp; switch (c) { case 'h': printf("Options:\n"); printf("\t-r : process rows (default %d)\n", nprow); printf("\t-c : process columns (default %d)\n", npcol); exit(0); break; case 'r': nprow = atoi(*cpp); break; case 'c': npcol = atoi(*cpp); break; } } else { /* Last arg is considered a filename */ if ( !(fp = fopen(*cpp, "r")) ) { ABORT("File does not exist"); } break; } } /* ------------------------------------------------------------ INITIALIZE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ superlu_gridinit(MPI_COMM_WORLD, nprow, npcol, &grid); /* Bail out if I do not belong in the grid. */ iam = grid.iam; if ( iam >= nprow * npcol ) goto out; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter main()"); #endif /* ------------------------------------------------------------ Process 0 reads the matrix A, and then broadcasts it to all the other processes. ------------------------------------------------------------*/ if ( !iam ) { /* Print the CPP definitions. */ cpp_defs(); /* Read the matrix stored on disk in Harwell-Boeing format. */ zreadhb_dist(iam, fp, &m, &n, &nnz, &a, &asub, &xa); printf("Input matrix file: %s\n", *cpp); printf("\tDimension\t%dx%d\t # nonzeros %d\n", m, n, nnz); printf("\tProcess grid\t%d X %d\n", (int) grid.nprow, (int) grid.npcol); /* Broadcast matrix A to the other PEs. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( a, nnz, SuperLU_MPI_DOUBLE_COMPLEX, 0, grid.comm ); MPI_Bcast( asub, nnz, mpi_int_t, 0, grid.comm ); MPI_Bcast( xa, n+1, mpi_int_t, 0, grid.comm ); } else { /* Receive matrix A from PE 0. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid.comm ); /* Allocate storage for compressed column representation. */ zallocateA_dist(n, nnz, &a, &asub, &xa); MPI_Bcast( a, nnz, SuperLU_MPI_DOUBLE_COMPLEX, 0, grid.comm ); MPI_Bcast( asub, nnz, mpi_int_t, 0, grid.comm ); MPI_Bcast( xa, n+1, mpi_int_t, 0, grid.comm ); } /* Create compressed column matrix for A. */ zCreate_CompCol_Matrix_dist(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_Z, SLU_GE); /* Generate the exact solution and compute the right-hand side. */ if (!(b=doublecomplexMalloc_dist(m * nrhs))) ABORT("Malloc fails for b[]"); if (!(xtrue=doublecomplexMalloc_dist(n*nrhs))) ABORT("Malloc fails for xtrue[]"); *trans = 'N'; ldx = n; ldb = m; zGenXtrue_dist(n, nrhs, xtrue, ldx); zFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb); /* Save a copy of the right-hand side. */ if ( !(b1 = doublecomplexMalloc_dist(m * nrhs)) ) ABORT("Malloc fails for b1[]"); for (j = 0; j < nrhs; ++j) for (i = 0; i < m; ++i) b1[i+j*ldb] = b[i+j*ldb]; if ( !(berr = doubleMalloc_dist(nrhs)) ) ABORT("Malloc fails for berr[]."); /* Save a copy of the matrix A. */ zallocateA_dist(n, nnz, &a1, &asub1, &xa1); for (i = 0; i < nnz; ++i) { a1[i] = a[i]; asub1[i] = asub[i]; } for (i = 0; i < n+1; ++i) xa1[i] = xa[i]; /* ------------------------------------------------------------ WE SOLVE THE LINEAR SYSTEM FOR THE FIRST TIME. ------------------------------------------------------------*/ /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = METIS_AT_PLUS_A; options.RowPerm = LargeDiag; options.ReplaceTinyPivot = YES; options.Trans = NOTRANS; options.IterRefine = DOUBLE; options.SolveInitialized = NO; options.RefineInitialized = NO; options.PrintStat = YES; */ set_default_options_dist(&options); if (!iam) { print_sp_ienv_dist(&options); print_options_dist(&options); } /* Initialize ScalePermstruct and LUstruct. */ ScalePermstructInit(m, n, &ScalePermstruct); LUstructInit(n, &LUstruct); /* Initialize the statistics variables. */ PStatInit(&stat); /* Call the linear equation solver: factorize and solve. */ pzgssvx_ABglobal(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid, &LUstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ if ( !iam ) { zinf_norm_error_dist(n, nrhs, b, ldb, xtrue, ldx, &grid); } PStatPrint(&options, &stat, &grid); /* Print the statistics. */ PStatFree(&stat); Destroy_CompCol_Matrix_dist(&A); /* Deallocate storage of matrix A. */ Destroy_LU(n, &grid, &LUstruct); /* Deallocate storage associated with the L and U matrices. */ SUPERLU_FREE(b); /* Free storage of right-hand side. */ /* ------------------------------------------------------------ NOW WE SOLVE ANOTHER LINEAR SYSTEM. ONLY THE SPARSITY PATTERN OF MATRIX A IS THE SAME. ------------------------------------------------------------*/ options.Fact = SamePattern; PStatInit(&stat); /* Initialize the statistics variables. */ /* Create compressed column matrix for A. */ zCreate_CompCol_Matrix_dist(&A, m, n, nnz, a1, asub1, xa1, SLU_NC, SLU_Z, SLU_GE); /* Solve the linear system. */ pzgssvx_ABglobal(&options, &A, &ScalePermstruct, b1, ldb, nrhs, &grid, &LUstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ if ( !iam ) { printf("Solve the system with the same sparsity pattern.\n"); zinf_norm_error_dist(n, nrhs, b1, ldb, xtrue, ldx, &grid); } /* Print the statistics. */ PStatPrint(&options, &stat, &grid); /* ------------------------------------------------------------ DEALLOCATE STORAGE. ------------------------------------------------------------*/ PStatFree(&stat); Destroy_CompCol_Matrix_dist(&A); /* Deallocate storage of matrix A. */ Destroy_LU(n, &grid, &LUstruct); /* Deallocate storage associated with the L and U matrices. */ ScalePermstructFree(&ScalePermstruct); LUstructFree(&LUstruct); /* Deallocate the structure of L and U.*/ SUPERLU_FREE(b1); /* Free storage of right-hand side. */ SUPERLU_FREE(xtrue); /* Free storage of the exact solution. */ SUPERLU_FREE(berr); /* ------------------------------------------------------------ RELEASE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ out: superlu_gridexit(&grid); /* ------------------------------------------------------------ TERMINATES THE MPI EXECUTION ENVIRONMENT. ------------------------------------------------------------*/ MPI_Finalize(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit main()"); #endif } int cpp_defs() { printf(".. CPP definitions:\n"); #if ( PRNTlevel>=1 ) printf("\tPRNTlevel = %d\n", PRNTlevel); #endif #if ( DEBUGlevel>=1 ) printf("\tDEBUGlevel = %d\n", DEBUGlevel); #endif #if ( PROFlevel>=1 ) printf("\tPROFlevel = %d\n", PROFlevel); #endif #if ( StaticPivot>=1 ) printf("\tStaticPivot = %d\n", StaticPivot); #endif printf("....\n"); return 0; } SuperLU_DIST_5.3.0/EXAMPLE/g4.rua0000644013363400111340000000275313233431301014761 0ustar xiaoyessgg4, symm. permuted by SYMMMD sym 17 1 3 13 0 RUA 16 16 64 0 (17I3) (26I3) (5E15.8) (5E15.8) 1 4 8 11 15 19 22 27 31 35 38 43 47 51 55 60 65 1 13 14 2 6 14 15 3 5 12 4 5 6 15 3 4 5 16 2 4 6 7 11 13 14 15 8 10 11 12 9 10 11 13 8 9 10 7 8 9 11 16 3 8 12 16 1 7 9 13 1 2 7 14 2 4 7 15 16 5 11 12 15 16 4.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00SuperLU_DIST_5.3.0/EXAMPLE/cg20.cua0000644013363400111340000021141713233431301015162 0ustar xiaoyessgcomplex g20, symm. permuted by SYMMMD sym 914 26 120 768 0 CUA 400 400 1920 0 (16I5) (16I5) (5E15.8) (5E15.8) 1 6 11 16 21 26 31 36 41 46 51 56 61 66 71 76 81 86 91 96 101 106 111 116 121 126 131 136 141 146 151 156 161 166 171 176 181 186 191 196 201 206 210 214 218 223 228 231 235 239 243 248 253 257 262 266 271 276 281 286 291 295 300 304 309 313 318 323 328 332 337 342 347 352 357 362 367 372 377 382 387 392 397 402 407 412 417 422 427 432 437 442 447 452 457 462 467 472 477 482 487 492 497 501 505 510 514 519 524 529 534 539 544 549 554 559 564 568 572 576 580 583 588 593 597 601 606 610 614 619 624 629 634 639 644 649 654 659 664 669 674 678 682 686 691 696 701 706 710 714 718 723 728 732 737 742 747 752 757 762 767 772 777 781 786 791 796 800 805 810 815 820 825 830 835 839 844 849 854 859 864 869 874 879 884 889 894 899 904 909 914 919 923 928 932 937 941 946 951 956 961 966 971 976 981 986 991 996 1001 1006 1011 1015 1020 1024 1029 1033 1038 1043 1048 1053 1058 1063 1068 1073 1078 1083 1088 1093 1098 1103 1108 1113 1117 1122 1126 1130 1133 1138 1142 1147 1152 1156 1161 1165 1170 1174 1179 1183 1188 1193 1198 1203 1208 1212 1217 1221 1226 1230 1235 1240 1245 1249 1254 1259 1264 1269 1274 1279 1284 1288 1293 1298 1303 1308 1313 1317 1322 1327 1332 1337 1342 1347 1352 1357 1362 1367 1372 1377 1382 1387 1392 1397 1402 1407 1412 1417 1422 1427 1432 1437 1442 1446 1451 1455 1459 1464 1468 1472 1477 1482 1487 1492 1497 1501 1506 1510 1515 1519 1522 1526 1530 1535 1539 1544 1549 1554 1559 1563 1568 1573 1578 1583 1587 1592 1596 1601 1605 1610 1615 1620 1625 1630 1635 1640 1645 1650 1655 1660 1665 1670 1674 1679 1684 1689 1694 1699 1704 1709 1714 1719 1724 1729 1734 1738 1743 1748 1753 1758 1763 1768 1773 1778 1783 1788 1792 1797 1802 1807 1811 1816 1821 1826 1831 1836 1841 1846 1851 1856 1861 1866 1871 1876 1881 1886 1891 1896 1901 1906 1911 1916 1921 1 9 32 391 395 2 9 392 395 400 3 8 389 393 394 4 8 9 392 394 5 7 8 381 389 6 7 8 9 32 5 6 7 33 382 3 4 5 6 8 1 2 4 6 9 10 31 396 398 399 11 18 29 30 31 12 18 31 397 399 13 17 386 387 388 14 17 18 30 387 15 17 383 388 390 16 17 18 390 397 13 14 15 16 17 11 12 14 16 18 19 28 33 380 382 20 22 28 29 33 21 22 29 31 398 20 21 22 32 391 23 27 379 384 385 24 27 28 379 380 25 27 30 385 387 26 27 28 29 30 23 24 25 26 27 19 20 24 26 28 11 20 21 26 29 11 14 25 26 30 10 11 12 21 31 1 6 22 32 33 7 19 20 32 33 34 74 92 370 372 35 40 70 174 176 36 40 91 174 177 37 39 40 70 71 38 39 40 90 91 37 38 39 74 92 35 36 37 38 40 41 56 70 71 73 42 44 55 56 43 44 175 176 42 43 44 45 44 45 56 70 176 46 54 55 56 73 47 49 53 48 49 54 55 47 48 49 52 50 51 53 69 50 51 52 68 72 49 51 52 53 54 47 50 52 53 46 48 52 54 72 42 46 48 55 41 42 45 46 56 57 59 67 68 72 58 59 67 74 372 57 58 59 71 73 60 66 67 372 373 61 62 65 69 61 62 66 67 68 63 64 65 374 63 64 66 371 373 61 63 65 66 60 62 64 65 66 57 58 60 62 67 51 57 62 68 69 50 61 68 69 35 37 41 45 70 37 41 59 71 74 51 54 57 72 73 41 46 59 72 73 34 39 58 71 74 75 89 369 381 389 76 78 92 370 375 77 78 89 369 375 76 77 78 88 90 79 87 91 177 178 80 87 88 90 91 81 86 87 173 178 82 86 173 379 380 83 85 86 87 88 84 85 86 380 382 83 84 85 89 381 81 82 83 84 86 79 80 81 83 87 78 80 83 88 89 75 77 85 88 89 38 78 80 90 92 36 38 79 80 91 34 39 76 90 92 93 172 385 386 387 94 98 168 169 170 95 98 169 172 386 96 98 109 170 171 97 98 109 386 388 94 95 96 97 98 99 108 377 383 390 100 108 376 377 378 101 107 109 167 171 102 107 109 383 388 103 106 107 167 104 106 108 378 105 106 107 108 383 103 104 105 106 101 102 103 105 107 99 100 104 105 108 96 97 101 102 109 110 166 173 379 384 111 132 138 162 164 112 132 138 168 169 113 131 132 163 164 114 130 131 132 168 115 129 130 168 170 116 123 128 129 130 117 121 123 128 118 120 131 163 119 120 121 123 118 119 120 122 117 119 121 120 122 123 130 131 116 117 119 122 123 124 127 128 129 125 127 167 171 126 127 129 170 171 124 125 126 127 116 117 124 128 115 116 124 126 129 114 115 116 122 130 113 114 118 122 131 111 112 113 114 132 133 137 138 162 165 134 137 165 166 384 135 137 138 169 172 136 137 172 384 385 133 134 135 136 137 111 112 133 135 138 139 145 174 175 176 140 145 161 174 177 141 143 145 175 142 143 153 160 141 142 143 144 143 144 145 160 161 139 140 141 144 145 146 152 153 159 160 147 152 159 162 165 148 150 152 153 149 150 163 164 148 149 150 151 150 151 152 162 164 146 147 148 151 152 142 146 148 153 154 158 159 160 161 155 158 161 177 178 156 158 159 165 166 157 158 166 173 178 154 155 156 157 158 146 147 154 156 159 142 144 146 154 160 140 144 154 155 161 111 133 147 151 162 113 118 149 163 111 113 149 151 164 133 134 147 156 165 110 134 156 157 166 101 103 125 167 94 112 114 115 168 94 95 112 135 169 94 96 115 126 170 96 101 125 126 171 93 95 135 136 172 81 82 110 157 173 35 36 139 140 174 43 139 141 175 35 43 45 139 176 36 79 140 155 177 79 81 155 157 178 179 183 201 369 375 180 183 200 201 274 181 183 369 389 393 182 183 272 274 393 179 180 181 182 183 184 188 370 372 373 185 188 199 371 373 186 188 201 370 375 187 188 199 200 201 184 185 186 187 188 189 198 200 274 277 190 198 273 276 277 191 197 198 199 200 192 193 196 374 192 193 197 199 371 194 195 196 275 194 195 197 198 276 192 194 196 197 191 193 195 196 197 189 190 191 195 198 185 187 191 193 199 180 187 189 191 200 179 180 186 187 201 202 271 272 393 394 203 207 272 274 277 204 207 218 273 277 205 207 267 271 272 206 207 218 266 267 203 204 205 206 207 208 217 218 266 268 209 217 265 268 270 210 216 217 218 273 211 212 215 275 211 212 216 273 276 213 214 215 269 213 214 216 217 270 211 213 215 216 210 212 214 215 216 208 209 210 214 217 204 206 208 210 218 219 223 271 392 394 220 223 229 267 271 221 223 365 392 400 222 223 229 365 366 219 220 221 222 223 224 228 229 266 267 225 228 263 266 268 226 228 229 264 366 227 228 262 263 264 224 225 226 227 228 220 222 224 226 229 230 249 263 265 268 231 239 248 249 265 232 233 238 269 232 233 239 265 270 234 236 237 238 235 236 237 247 234 235 236 234 235 237 239 248 232 234 238 239 231 233 237 238 239 240 246 249 262 263 241 242 245 247 241 242 246 248 249 243 244 245 261 243 244 246 260 262 241 243 245 246 240 242 244 245 246 235 241 247 248 231 237 242 247 248 230 231 240 242 249 250 259 264 364 366 251 259 260 262 264 252 258 259 364 367 253 254 257 261 253 254 258 259 260 255 256 257 363 255 256 258 367 368 253 255 257 258 252 254 256 257 258 250 251 252 254 259 244 251 254 260 261 243 253 260 261 227 240 244 251 262 225 227 230 240 263 226 227 250 251 264 209 230 231 233 265 206 208 224 225 266 205 206 220 224 267 208 209 225 230 268 213 232 269 270 209 214 233 269 270 202 205 219 220 271 182 202 203 205 272 190 204 210 212 273 180 182 189 203 274 194 211 275 276 190 195 212 275 276 189 190 203 204 277 278 283 289 395 400 279 283 289 361 362 280 282 283 391 395 281 282 283 359 361 280 281 282 396 398 278 279 280 281 283 284 288 289 365 400 285 288 364 365 366 286 288 289 360 362 287 288 360 364 367 284 285 286 287 288 278 279 284 286 289 290 292 298 396 399 291 292 298 355 357 290 291 292 358 359 293 297 377 390 397 294 297 298 397 399 295 297 356 376 377 296 297 298 355 356 293 294 295 296 297 290 291 294 296 298 299 328 351 355 357 300 308 328 355 356 301 307 326 327 302 307 308 326 328 303 306 376 378 304 306 307 308 305 306 308 356 376 303 304 305 306 301 302 304 307 300 302 304 305 308 309 311 326 328 351 310 311 324 326 327 309 310 311 325 353 312 323 325 352 353 313 314 322 350 313 314 323 352 354 315 316 320 322 315 316 321 323 325 317 319 324 327 318 319 320 317 318 319 321 315 318 320 321 316 319 320 321 324 313 315 322 323 312 314 316 322 323 310 317 321 324 325 311 312 316 324 325 301 302 309 310 326 301 310 317 327 299 300 302 309 328 329 338 349 360 362 330 338 360 367 368 331 337 338 348 349 332 333 336 363 332 333 337 338 368 334 335 336 350 334 335 337 348 354 332 334 336 337 331 333 335 336 337 329 330 331 333 338 339 347 358 359 361 340 347 349 361 362 341 346 347 348 349 342 346 348 352 354 343 345 346 347 358 344 345 346 352 353 343 344 345 351 357 341 342 343 344 346 339 340 341 343 347 331 335 341 342 348 329 331 340 341 349 313 334 350 354 299 309 345 351 353 312 314 342 344 352 311 312 344 351 353 314 335 342 350 354 291 296 299 300 355 295 296 300 305 356 291 299 345 357 358 292 339 343 357 358 281 292 339 359 396 286 287 329 330 360 279 281 339 340 361 279 286 329 340 362 255 332 363 368 250 252 285 287 364 221 222 284 285 365 222 226 250 285 366 252 256 287 330 367 256 330 333 363 368 75 77 179 181 369 34 76 184 186 370 64 185 193 371 374 34 58 60 184 372 60 64 184 185 373 63 192 371 374 76 77 179 186 375 100 295 303 305 376 99 100 293 295 377 100 104 303 378 23 24 82 110 379 19 24 82 84 380 5 75 85 381 382 7 19 84 381 382 15 99 102 105 383 23 110 134 136 384 23 25 93 136 385 13 93 95 97 386 13 14 25 93 387 13 15 97 102 388 3 5 75 181 389 15 16 99 293 390 1 22 280 391 398 2 4 219 221 392 3 181 182 202 393 3 4 202 219 394 1 2 278 280 395 10 282 290 359 396 12 16 293 294 397 10 21 282 391 398 10 12 290 294 399 2 221 278 284 400 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 -1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00-1.00000000E+00 1.00000000E+00 4.00000000E+00 1.00000000E+00 SuperLU_DIST_5.3.0/EXAMPLE/pzdrive_ABglobal.c0000644013363400111340000001743713233431301017315 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Driver program for pzgssvx_ABglobal example * *
 * -- Distributed SuperLU routine (version 1.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 * 
*/ #include #include "superlu_zdefs.h" /*! \brief * *
 * Purpose
 * =======
 *
 * The driver program pzdrive_ABglobal.
 *
 * This example illustrates how to use pzgssvx_ABglobal with the full
 * (default) options to solve a linear system.
 * 
 * Five basic steps are required:
 *   1. Initialize the MPI environment and the SuperLU process grid
 *   2. Set up the input matrix and the right-hand side
 *   3. Set the options argument
 *   4. Call pzgssvx_ABglobal
 *   5. Release the process grid and terminate the MPI environment
 *
 * On an IBM SP, the program may be run by typing
 *    poe pzdrive_ABglobal -r  -c   -procs 

*

*/ int main(int argc, char *argv[]) { superlu_dist_options_t options; SuperLUStat_t stat; SuperMatrix A; ScalePermstruct_t ScalePermstruct; LUstruct_t LUstruct; gridinfo_t grid; double *berr; doublecomplex *a, *b, *xtrue; int_t *asub, *xa; int_t m, n, nnz; int_t nprow, npcol; int iam, info, ldb, ldx, nrhs; char trans[1]; char **cpp, c; FILE *fp, *fopen(); extern int cpp_defs(); /* prototypes */ extern void LUstructInit(const int_t, LUstruct_t *); extern void LUstructFree(LUstruct_t *); extern void Destroy_LU(int_t, gridinfo_t *, LUstruct_t *); nprow = 1; /* Default process rows. */ npcol = 1; /* Default process columns. */ nrhs = 1; /* Number of right-hand side. */ /* ------------------------------------------------------------ INITIALIZE MPI ENVIRONMENT. ------------------------------------------------------------*/ MPI_Init( &argc, &argv ); /* Parse command line argv[]. */ for (cpp = argv+1; *cpp; ++cpp) { if ( **cpp == '-' ) { c = *(*cpp+1); ++cpp; switch (c) { case 'h': printf("Options:\n"); printf("\t-r : process rows (default " IFMT ")\n", nprow); printf("\t-c : process columns (default " IFMT ")\n", npcol); exit(0); break; case 'r': nprow = atoi(*cpp); break; case 'c': npcol = atoi(*cpp); break; } } else { /* Last arg is considered a filename */ if ( !(fp = fopen(*cpp, "r")) ) { ABORT("File does not exist"); } break; } } /* ------------------------------------------------------------ INITIALIZE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ superlu_gridinit(MPI_COMM_WORLD, nprow, npcol, &grid); /* Bail out if I do not belong in the grid. */ iam = grid.iam; if ( iam >= nprow * npcol ) goto out; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter main()"); #endif /* ------------------------------------------------------------ PROCESS 0 READS THE MATRIX A, AND THEN BROADCASTS IT TO ALL THE OTHER PROCESSES. ------------------------------------------------------------*/ if ( !iam ) { /* Print the CPP definitions. */ cpp_defs(); /* Read the matrix stored on disk in Harwell-Boeing format. */ zreadhb_dist(iam, fp, &m, &n, &nnz, &a, &asub, &xa); printf("Input matrix file: %s\n", *cpp); printf("\tDimension\t" IFMT "x" IFMT "\t # nonzeros " IFMT "\n", m, n, nnz); printf("\tProcess grid\t%d X %d\n", (int) grid.nprow, (int) grid.npcol); /* Broadcast matrix A to the other PEs. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( a, nnz, SuperLU_MPI_DOUBLE_COMPLEX, 0, grid.comm ); MPI_Bcast( asub, nnz, mpi_int_t, 0, grid.comm ); MPI_Bcast( xa, n+1, mpi_int_t, 0, grid.comm ); } else { /* Receive matrix A from PE 0. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid.comm ); /* Allocate storage for compressed column representation. */ zallocateA_dist(n, nnz, &a, &asub, &xa); MPI_Bcast( a, nnz, SuperLU_MPI_DOUBLE_COMPLEX, 0, grid.comm ); MPI_Bcast( asub, nnz, mpi_int_t, 0, grid.comm ); MPI_Bcast( xa, n+1, mpi_int_t, 0, grid.comm ); } /* Create compressed column matrix for A. */ zCreate_CompCol_Matrix_dist(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_Z, SLU_GE); /* Generate the exact solution and compute the right-hand side. */ if (!(b=doublecomplexMalloc_dist(m*nrhs))) ABORT("Malloc fails for b[]"); if (!(xtrue=doublecomplexMalloc_dist(n*nrhs))) ABORT("Malloc fails for xtrue[]"); *trans = 'N'; ldx = n; ldb = m; zGenXtrue_dist(n, nrhs, xtrue, ldx); zFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb); if ( !(berr = doubleMalloc_dist(nrhs)) ) ABORT("Malloc fails for berr[]."); /* ------------------------------------------------------------ NOW WE SOLVE THE LINEAR SYSTEM. ------------------------------------------------------------*/ /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = METIS_AT_PLUS_A; options.RowPerm = LargeDiag; options.ReplaceTinyPivot = YES; options.Trans = NOTRANS; options.IterRefine = DOUBLE; options.SolveInitialized = NO; options.RefineInitialized = NO; options.PrintStat = YES; */ set_default_options_dist(&options); if (!iam) { print_sp_ienv_dist(&options); print_options_dist(&options); } /* Initialize ScalePermstruct and LUstruct. */ ScalePermstructInit(m, n, &ScalePermstruct); LUstructInit(n, &LUstruct); /* Initialize the statistics variables. */ PStatInit(&stat); /* Call the linear equation solver. */ pzgssvx_ABglobal(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid, &LUstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ if ( !iam ) { zinf_norm_error_dist(n, nrhs, b, ldb, xtrue, ldx, &grid); } PStatPrint(&options, &stat, &grid); /* Print the statistics. */ /* ------------------------------------------------------------ DEALLOCATE STORAGE. ------------------------------------------------------------*/ PStatFree(&stat); Destroy_CompCol_Matrix_dist(&A); Destroy_LU(n, &grid, &LUstruct); ScalePermstructFree(&ScalePermstruct); LUstructFree(&LUstruct); SUPERLU_FREE(b); SUPERLU_FREE(xtrue); SUPERLU_FREE(berr); /* ------------------------------------------------------------ RELEASE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ out: superlu_gridexit(&grid); /* ------------------------------------------------------------ TERMINATES THE MPI EXECUTION ENVIRONMENT. ------------------------------------------------------------*/ MPI_Finalize(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit main()"); #endif } int cpp_defs() { printf(".. CPP definitions:\n"); #if ( PRNTlevel>=1 ) printf("\tPRNTlevel = %d\n", PRNTlevel); #endif #if ( DEBUGlevel>=1 ) printf("\tDEBUGlevel = %d\n", DEBUGlevel); #endif #if ( PROFlevel>=1 ) printf("\tPROFlevel = %d\n", PROFlevel); #endif #if ( StaticPivot>=1 ) printf("\tStaticPivot = %d\n", StaticPivot); #endif printf("....\n"); return 0; } SuperLU_DIST_5.3.0/EXAMPLE/zreadtriple.c0000644013363400111340000001020413233431301016415 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief * */ #include #include "superlu_zdefs.h" #undef EXPAND_SYM /*! brief * *
 * Output parameters
 * =================
 *   (nzval, rowind, colptr): (*rowind)[*] contains the row subscripts of
 *      nonzeros in columns of matrix A; (*nzval)[*] the numerical values;
 *	column i of A is given by (*nzval)[k], k = (*rowind)[i],...,
 *      (*rowind)[i+1]-1.
 * 
*/ void zreadtriple(FILE *fp, int_t *m, int_t *n, int_t *nonz, doublecomplex **nzval, int_t **rowind, int_t **colptr) { int_t i, j, k, jsize, lasta, nnz, nz, new_nonz; doublecomplex *a, *val; int_t *asub, *xa, *row, *col; int_t zero_base = 0; /* File format: * First line: #rows #non-zero * Triplet in the rest of lines: * row col value */ /*fscanf(fp, "%d%d%d", m, n, nonz);*/ #ifdef _LONGINT fscanf(fp, "%ld%ld", n, nonz); #else fscanf(fp, "%d%d", n, nonz); #endif #ifdef EXPAND_SYM new_nonz = 2 * *nonz - *n; #else new_nonz = *nonz; #endif *m = *n; printf("m %ld, n %ld, nonz %ld\n", *m, *n, *nonz); zallocateA_dist(*n, new_nonz, nzval, rowind, colptr); /* Allocate storage */ a = *nzval; asub = *rowind; xa = *colptr; if ( !(val = (doublecomplex *) SUPERLU_MALLOC(new_nonz * sizeof(doublecomplex))) ) ABORT("Malloc fails for val[]"); if ( !(row = (int_t *) SUPERLU_MALLOC(new_nonz * sizeof(int_t))) ) ABORT("Malloc fails for row[]"); if ( !(col = (int_t *) SUPERLU_MALLOC(new_nonz * sizeof(int_t))) ) ABORT("Malloc fails for col[]"); for (j = 0; j < *n; ++j) xa[j] = 0; /* Read into the triplet array from a file */ for (nnz = 0, nz = 0; nnz < *nonz; ++nnz) { #ifdef _LONGINT fscanf(fp, "%ld%ld%lf%lf\n", &row[nz], &col[nz], &val[nz].r, &val[nz].i); #else fscanf(fp, "%d%d%lf%lf\n", &row[nz], &col[nz], &val[nz].r, &val[nz].i); #endif if ( nnz == 0 ) /* first nonzero */ if ( row[0] == 0 || col[0] == 0 ) { zero_base = 1; printf("triplet file: row/col indices are zero-based.\n"); } else printf("triplet file: row/col indices are one-based.\n"); if ( !zero_base ) { /* Change to 0-based indexing. */ --row[nz]; --col[nz]; } if (row[nz] < 0 || row[nz] >= *m || col[nz] < 0 || col[nz] >= *n /*|| val[nz] == 0.*/) { fprintf(stderr, "nz %d, (%d, %d) = %e out of bound, removed\n", nz, row[nz], col[nz], val[nz]); exit(-1); } else { ++xa[col[nz]]; #ifdef EXPAND_SYM if ( row[nz] != col[nz] ) { /* Excluding diagonal */ ++nz; row[nz] = col[nz-1]; col[nz] = row[nz-1]; val[nz] = val[nz-1]; ++xa[col[nz]]; } #endif ++nz; } } *nonz = nz; #ifdef EXPAND_SYM printf("new_nonz after symmetric expansion:\t%d\n", *nonz); #endif /* Initialize the array of column pointers */ k = 0; jsize = xa[0]; xa[0] = 0; for (j = 1; j < *n; ++j) { k += jsize; jsize = xa[j]; xa[j] = k; } /* Copy the triplets into the column oriented storage */ for (nz = 0; nz < *nonz; ++nz) { j = col[nz]; k = xa[j]; asub[k] = row[nz]; a[k] = val[nz]; ++xa[j]; } /* Reset the column pointers to the beginning of each column */ for (j = *n; j > 0; --j) xa[j] = xa[j-1]; xa[0] = 0; SUPERLU_FREE(val); SUPERLU_FREE(row); SUPERLU_FREE(col); #ifdef CHK_INPUT for (i = 0; i < *n; i++) { printf("Col %d, xa %d\n", i, xa[i]); for (k = xa[i]; k < xa[i+1]; k++) printf("%d\t%16.10f\n", asub[k], a[k]); } #endif } void zreadrhs(int m, doublecomplex *b) { FILE *fp, *fopen(); int i, j; if ( !(fp = fopen("b.dat", "r")) ) { fprintf(stderr, "zreadrhs: file does not exist\n"); exit(-1); } for (i = 0; i < m; ++i) fscanf(fp, "%lf%lf\n", &(b[i].r), &(b[i].i)); fclose(fp); } SuperLU_DIST_5.3.0/EXAMPLE/zcreate_matrix.c0000644013363400111340000001566613233431301017132 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Read the matrix from data file * *
 * -- Distributed SuperLU routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * March 15, 2003
 * 
*/ #include #include "superlu_zdefs.h" /* \brief * *
 * Purpose
 * =======
 * 
 * ZCREATE_MATRIX read the matrix from data file in Harwell-Boeing format,
 * and distribute it to processors in a distributed compressed row format.
 * It also generate the distributed true solution X and the right-hand
 * side RHS.
 *
 *
 * Arguments   
 * =========      
 *
 * A     (output) SuperMatrix*
 *       Local matrix A in NR_loc format. 
 *
 * NRHS  (input) int_t
 *       Number of right-hand sides.
 *
 * RHS   (output) doublecomplex**
 *       The right-hand side matrix.
 *
 * LDB   (output) int*
 *       Leading dimension of the right-hand side matrix.
 *
 * X     (output) doublecomplex**
 *       The true solution matrix.
 *
 * LDX   (output) int*
 *       The leading dimension of the true solution matrix.
 *
 * FP    (input) FILE*
 *       The matrix file pointer.
 *
 * GRID  (input) gridinof_t*
 *       The 2D process mesh.
 * 
*/ int zcreate_matrix(SuperMatrix *A, int nrhs, doublecomplex **rhs, int *ldb, doublecomplex **x, int *ldx, FILE *fp, gridinfo_t *grid) { SuperMatrix GA; /* global A */ doublecomplex *b_global, *xtrue_global; /* replicated on all processes */ int_t *rowind, *colptr; /* global */ doublecomplex *nzval; /* global */ doublecomplex *nzval_loc; /* local */ int_t *colind, *rowptr; /* local */ int_t m, n, nnz; int_t m_loc, fst_row, nnz_loc; int_t m_loc_fst; /* Record m_loc of the first p-1 processors, when mod(m, p) is not zero. */ int_t row, col, i, j, relpos; int iam; char trans[1]; int_t *marker; iam = grid->iam; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter zcreate_matrix()"); #endif if ( !iam ) { double t = SuperLU_timer_(); /* Read the matrix stored on disk in Harwell-Boeing format. */ zreadhb_dist(iam, fp, &m, &n, &nnz, &nzval, &rowind, &colptr); printf("Time to read and distribute matrix %.2f\n", SuperLU_timer_() - t); fflush(stdout); /* Broadcast matrix A to the other PEs. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( nzval, nnz, SuperLU_MPI_DOUBLE_COMPLEX, 0, grid->comm ); MPI_Bcast( rowind, nnz, mpi_int_t, 0, grid->comm ); MPI_Bcast( colptr, n+1, mpi_int_t, 0, grid->comm ); } else { /* Receive matrix A from PE 0. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid->comm ); /* Allocate storage for compressed column representation. */ zallocateA_dist(n, nnz, &nzval, &rowind, &colptr); MPI_Bcast( nzval, nnz, SuperLU_MPI_DOUBLE_COMPLEX, 0, grid->comm ); MPI_Bcast( rowind, nnz, mpi_int_t, 0, grid->comm ); MPI_Bcast( colptr, n+1, mpi_int_t, 0, grid->comm ); } #if 0 nzval[0].r = 0.1; nzval[0].i = 0.0; #endif /* Compute the number of rows to be distributed to local process */ m_loc = m / (grid->nprow * grid->npcol); m_loc_fst = m_loc; /* When m / procs is not an integer */ if ((m_loc * grid->nprow * grid->npcol) != m) { /*m_loc = m_loc+1; m_loc_fst = m_loc;*/ if (iam == (grid->nprow * grid->npcol - 1)) /* last proc. gets all*/ m_loc = m - m_loc * (grid->nprow * grid->npcol - 1); } /* Create compressed column matrix for GA. */ zCreate_CompCol_Matrix_dist(&GA, m, n, nnz, nzval, rowind, colptr, SLU_NC, SLU_Z, SLU_GE); /* Generate the exact solution and compute the right-hand side. */ if ( !(b_global = doublecomplexMalloc_dist(m*nrhs)) ) ABORT("Malloc fails for b[]"); if ( !(xtrue_global = doublecomplexMalloc_dist(n*nrhs)) ) ABORT("Malloc fails for xtrue[]"); *trans = 'N'; zGenXtrue_dist(n, nrhs, xtrue_global, n); zFillRHS_dist(trans, nrhs, xtrue_global, n, &GA, b_global, m); /************************************************* * Change GA to a local A with NR_loc format * *************************************************/ rowptr = (int_t *) intMalloc_dist(m_loc+1); marker = (int_t *) intCalloc_dist(n); /* Get counts of each row of GA */ for (i = 0; i < n; ++i) for (j = colptr[i]; j < colptr[i+1]; ++j) ++marker[rowind[j]]; /* Set up row pointers */ rowptr[0] = 0; fst_row = iam * m_loc_fst; nnz_loc = 0; for (j = 0; j < m_loc; ++j) { row = fst_row + j; rowptr[j+1] = rowptr[j] + marker[row]; marker[j] = rowptr[j]; } nnz_loc = rowptr[m_loc]; nzval_loc = (doublecomplex *) doublecomplexMalloc_dist(nnz_loc); colind = (int_t *) intMalloc_dist(nnz_loc); /* Transfer the matrix into the compressed row storage */ for (i = 0; i < n; ++i) { for (j = colptr[i]; j < colptr[i+1]; ++j) { row = rowind[j]; if ( (row>=fst_row) && (row=2 ) if ( !iam ) zPrint_CompCol_Matrix_dist(&GA); #endif /* Destroy GA */ Destroy_CompCol_Matrix_dist(&GA); /******************************************************/ /* Change GA to a local A with NR_loc format */ /******************************************************/ /* Set up the local A in NR_loc format */ zCreate_CompRowLoc_Matrix_dist(A, m, n, nnz_loc, m_loc, fst_row, nzval_loc, colind, rowptr, SLU_NR_loc, SLU_Z, SLU_GE); /* Get the local B */ if ( !((*rhs) = doublecomplexMalloc_dist(m_loc*nrhs)) ) ABORT("Malloc fails for rhs[]"); for (j =0; j < nrhs; ++j) { for (i = 0; i < m_loc; ++i) { row = fst_row + i; (*rhs)[j*m_loc+i] = b_global[j*n+row]; } } *ldb = m_loc; /* Set the true X */ *ldx = m_loc; if ( !((*x) = doublecomplexMalloc_dist(*ldx * nrhs)) ) ABORT("Malloc fails for x_loc[]"); /* Get the local part of xtrue_global */ for (j = 0; j < nrhs; ++j) { for (i = 0; i < m_loc; ++i) (*x)[i + j*(*ldx)] = xtrue_global[i + fst_row + j*n]; } SUPERLU_FREE(b_global); SUPERLU_FREE(xtrue_global); SUPERLU_FREE(marker); #if ( DEBUGlevel>=1 ) printf("sizeof(NRforamt_loc) %lu\n", sizeof(NRformat_loc)); CHECK_MALLOC(iam, "Exit zcreate_matrix()"); #endif return 0; } SuperLU_DIST_5.3.0/EXAMPLE/dcreate_matrix.c0000644013363400111340000001544613233431301017100 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Read the matrix from data file * *
 * -- Distributed SuperLU routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * March 15, 2003
 * 
*/ #include #include "superlu_ddefs.h" /* \brief * *
 * Purpose
 * =======
 * 
 * DCREATE_MATRIX read the matrix from data file in Harwell-Boeing format,
 * and distribute it to processors in a distributed compressed row format.
 * It also generate the distributed true solution X and the right-hand
 * side RHS.
 *
 *
 * Arguments   
 * =========      
 *
 * A     (output) SuperMatrix*
 *       Local matrix A in NR_loc format. 
 *
 * NRHS  (input) int_t
 *       Number of right-hand sides.
 *
 * RHS   (output) double**
 *       The right-hand side matrix.
 *
 * LDB   (output) int*
 *       Leading dimension of the right-hand side matrix.
 *
 * X     (output) double**
 *       The true solution matrix.
 *
 * LDX   (output) int*
 *       The leading dimension of the true solution matrix.
 *
 * FP    (input) FILE*
 *       The matrix file pointer.
 *
 * GRID  (input) gridinof_t*
 *       The 2D process mesh.
 * 
*/ int dcreate_matrix(SuperMatrix *A, int nrhs, double **rhs, int *ldb, double **x, int *ldx, FILE *fp, gridinfo_t *grid) { SuperMatrix GA; /* global A */ double *b_global, *xtrue_global; /* replicated on all processes */ int_t *rowind, *colptr; /* global */ double *nzval; /* global */ double *nzval_loc; /* local */ int_t *colind, *rowptr; /* local */ int_t m, n, nnz; int_t m_loc, fst_row, nnz_loc; int_t m_loc_fst; /* Record m_loc of the first p-1 processors, when mod(m, p) is not zero. */ int_t row, col, i, j, relpos; int iam; char trans[1]; int_t *marker; iam = grid->iam; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter dcreate_matrix()"); #endif if ( !iam ) { double t = SuperLU_timer_(); /* Read the matrix stored on disk in Harwell-Boeing format. */ dreadhb_dist(iam, fp, &m, &n, &nnz, &nzval, &rowind, &colptr); printf("Time to read and distribute matrix %.2f\n", SuperLU_timer_() - t); fflush(stdout); /* Broadcast matrix A to the other PEs. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( nzval, nnz, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( rowind, nnz, mpi_int_t, 0, grid->comm ); MPI_Bcast( colptr, n+1, mpi_int_t, 0, grid->comm ); } else { /* Receive matrix A from PE 0. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid->comm ); /* Allocate storage for compressed column representation. */ dallocateA_dist(n, nnz, &nzval, &rowind, &colptr); MPI_Bcast( nzval, nnz, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( rowind, nnz, mpi_int_t, 0, grid->comm ); MPI_Bcast( colptr, n+1, mpi_int_t, 0, grid->comm ); } #if 0 nzval[0]=0.1; #endif /* Compute the number of rows to be distributed to local process */ m_loc = m / (grid->nprow * grid->npcol); m_loc_fst = m_loc; /* When m / procs is not an integer */ if ((m_loc * grid->nprow * grid->npcol) != m) { /*m_loc = m_loc+1; m_loc_fst = m_loc;*/ if (iam == (grid->nprow * grid->npcol - 1)) /* last proc. gets all*/ m_loc = m - m_loc * (grid->nprow * grid->npcol - 1); } /* Create compressed column matrix for GA. */ dCreate_CompCol_Matrix_dist(&GA, m, n, nnz, nzval, rowind, colptr, SLU_NC, SLU_D, SLU_GE); /* Generate the exact solution and compute the right-hand side. */ if ( !(b_global = doubleMalloc_dist(m*nrhs)) ) ABORT("Malloc fails for b[]"); if ( !(xtrue_global = doubleMalloc_dist(n*nrhs)) ) ABORT("Malloc fails for xtrue[]"); *trans = 'N'; dGenXtrue_dist(n, nrhs, xtrue_global, n); dFillRHS_dist(trans, nrhs, xtrue_global, n, &GA, b_global, m); /************************************************* * Change GA to a local A with NR_loc format * *************************************************/ rowptr = (int_t *) intMalloc_dist(m_loc+1); marker = (int_t *) intCalloc_dist(n); /* Get counts of each row of GA */ for (i = 0; i < n; ++i) for (j = colptr[i]; j < colptr[i+1]; ++j) ++marker[rowind[j]]; /* Set up row pointers */ rowptr[0] = 0; fst_row = iam * m_loc_fst; nnz_loc = 0; for (j = 0; j < m_loc; ++j) { row = fst_row + j; rowptr[j+1] = rowptr[j] + marker[row]; marker[j] = rowptr[j]; } nnz_loc = rowptr[m_loc]; nzval_loc = (double *) doubleMalloc_dist(nnz_loc); colind = (int_t *) intMalloc_dist(nnz_loc); /* Transfer the matrix into the compressed row storage */ for (i = 0; i < n; ++i) { for (j = colptr[i]; j < colptr[i+1]; ++j) { row = rowind[j]; if ( (row>=fst_row) && (row=2 ) if ( !iam ) dPrint_CompCol_Matrix_dist(&GA); #endif /* Destroy GA */ Destroy_CompCol_Matrix_dist(&GA); /******************************************************/ /* Change GA to a local A with NR_loc format */ /******************************************************/ /* Set up the local A in NR_loc format */ dCreate_CompRowLoc_Matrix_dist(A, m, n, nnz_loc, m_loc, fst_row, nzval_loc, colind, rowptr, SLU_NR_loc, SLU_D, SLU_GE); /* Get the local B */ if ( !((*rhs) = doubleMalloc_dist(m_loc*nrhs)) ) ABORT("Malloc fails for rhs[]"); for (j =0; j < nrhs; ++j) { for (i = 0; i < m_loc; ++i) { row = fst_row + i; (*rhs)[j*m_loc+i] = b_global[j*n+row]; } } *ldb = m_loc; /* Set the true X */ *ldx = m_loc; if ( !((*x) = doubleMalloc_dist(*ldx * nrhs)) ) ABORT("Malloc fails for x_loc[]"); /* Get the local part of xtrue_global */ for (j = 0; j < nrhs; ++j) { for (i = 0; i < m_loc; ++i) (*x)[i + j*(*ldx)] = xtrue_global[i + fst_row + j*n]; } SUPERLU_FREE(b_global); SUPERLU_FREE(xtrue_global); SUPERLU_FREE(marker); #if ( DEBUGlevel>=1 ) printf("sizeof(NRforamt_loc) %lu\n", sizeof(NRformat_loc)); CHECK_MALLOC(iam, "Exit dcreate_matrix()"); #endif return 0; } SuperLU_DIST_5.3.0/EXAMPLE/pzdrive3_ABglobal.c0000644013363400111340000002372213233431301017372 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Driver program for pzgssvx_ABglobal example * *
 * -- Distributed SuperLU routine (version 4.1) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 * April 5, 2015
 * 
*/ #include #include "superlu_zdefs.h" /*! \brief * *
 * Purpose
 * =======
 *
 * The driver program pzdrive3A_ABglobal.
 *
 * This example illustrates how to use pzgssvx_ABglobal to solve
 * systems repeatedly with the same sparsity pattern and similar
 * numerical values of matrix A.
 * In this case, the column permutation vector and symbolic factorization are
 * computed only once. The following data structures will be reused in the
 * subsequent call to pzgssvx_ABglobal:
 *        ScalePermstruct : DiagScale, R, C, perm_r, perm_c
 *        LUstruct        : etree, Glu_persist, Llu
 *
 * NOTE:
 * The distributed nonzero structures of L and U remain the same,
 * although the numerical values are different. So 'Llu' is set up once
 * in the first call to pzgssvx_ABglobal, and reused in the subsequent call.
 *
 * On an IBM SP, the program may be run by typing:
 *    poe pzdrive3_ABglobal -r  -c    -procs 

*

*/ int main(int argc, char *argv[]) { superlu_dist_options_t options; SuperLUStat_t stat; SuperMatrix A; ScalePermstruct_t ScalePermstruct; LUstruct_t LUstruct; gridinfo_t grid; double *berr; doublecomplex *a, *a1, *b, *b1, *xtrue; int_t *asub, *asub1, *xa, *xa1; int_t i, j, m, n, nnz; int_t nprow, npcol; int iam, info, ldb, ldx, nrhs; char trans[1]; char **cpp, c; FILE *fp, *fopen(); extern int cpp_defs(); /* prototypes */ extern void LUstructInit(const int_t, LUstruct_t *); extern void LUstructFree(LUstruct_t *); extern void Destroy_LU(int_t, gridinfo_t *, LUstruct_t *); nprow = 1; /* Default process rows. */ npcol = 1; /* Default process columns. */ nrhs = 1; /* Number of right-hand side. */ /* ------------------------------------------------------------ INITIALIZE MPI ENVIRONMENT. ------------------------------------------------------------*/ MPI_Init( &argc, &argv ); /* Parse command line argv[]. */ for (cpp = argv+1; *cpp; ++cpp) { if ( **cpp == '-' ) { c = *(*cpp+1); ++cpp; switch (c) { case 'h': printf("Options:\n"); printf("\t-r : process rows (default %d)\n", nprow); printf("\t-c : process columns (default %d)\n", npcol); exit(0); break; case 'r': nprow = atoi(*cpp); break; case 'c': npcol = atoi(*cpp); break; } } else { /* Last arg is considered a filename */ if ( !(fp = fopen(*cpp, "r")) ) { ABORT("File does not exist"); } break; } } /* ------------------------------------------------------------ INITIALIZE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ superlu_gridinit(MPI_COMM_WORLD, nprow, npcol, &grid); /* Bail out if I do not belong in the grid. */ iam = grid.iam; if ( iam >= nprow * npcol ) goto out; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter main()"); #endif /* ------------------------------------------------------------ PROCESS 0 READS THE MATRIX A, AND THEN BROADCASTS IT TO ALL THE OTHER PROCESSES. ------------------------------------------------------------*/ if ( !iam ) { /* Print the CPP definitions. */ cpp_defs(); /* Read the matrix stored on disk in Harwell-Boeing format. */ zreadhb_dist(iam, fp, &m, &n, &nnz, &a, &asub, &xa); printf("Input matrix file: %s\n", *cpp); printf("\tDimension\t%dx%d\t # nonzeros %d\n", m, n, nnz); printf("\tProcess grid\t%d X %d\n", (int) grid.nprow, (int) grid.npcol); /* Broadcast matrix A to the other PEs. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( a, nnz, SuperLU_MPI_DOUBLE_COMPLEX, 0, grid.comm ); MPI_Bcast( asub, nnz, mpi_int_t, 0, grid.comm ); MPI_Bcast( xa, n+1, mpi_int_t, 0, grid.comm ); } else { /* Receive matrix A from PE 0. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid.comm ); /* Allocate storage for compressed column representation. */ zallocateA_dist(n, nnz, &a, &asub, &xa); MPI_Bcast( a, nnz, SuperLU_MPI_DOUBLE_COMPLEX, 0, grid.comm ); MPI_Bcast( asub, nnz, mpi_int_t, 0, grid.comm ); MPI_Bcast( xa, n+1, mpi_int_t, 0, grid.comm ); } /* Create compressed column matrix for A. */ zCreate_CompCol_Matrix_dist(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_Z, SLU_GE); /* Generate the exact solution and compute the right-hand side. */ if (!(b=doublecomplexMalloc_dist(m*nrhs))) ABORT("Malloc fails for b[]"); if (!(xtrue=doublecomplexMalloc_dist(n*nrhs))) ABORT("Malloc fails for xtrue[]"); *trans = 'N'; ldx = n; ldb = m; zGenXtrue_dist(n, nrhs, xtrue, ldx); zFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb); /* Save a copy of the right-hand side. */ if ( !(b1 = doublecomplexMalloc_dist(m * nrhs)) ) ABORT("Malloc fails for b1[]"); for (j = 0; j < nrhs; ++j) for (i = 0; i < m; ++i) b1[i+j*ldb] = b[i+j*ldb]; if ( !(berr = doubleMalloc_dist(nrhs)) ) ABORT("Malloc fails for berr[]."); /* Save a copy of the matrix A. */ zallocateA_dist(n, nnz, &a1, &asub1, &xa1); for (i = 0; i < nnz; ++i) { a1[i] = a[i]; asub1[i] = asub[i]; } for (i = 0; i < n+1; ++i) xa1[i] = xa[i]; /* ------------------------------------------------------------ WE SOLVE THE LINEAR SYSTEM FOR THE FIRST TIME. ------------------------------------------------------------*/ /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = METIS_AT_PLUS_A; options.RowPerm = LargeDiag; options.ReplaceTinyPivot = YES; options.Trans = NOTRANS; options.IterRefine = DOUBLE; options.SolveInitialized = NO; options.RefineInitialized = NO; options.PrintStat = YES; */ set_default_options_dist(&options); if (!iam) { print_sp_ienv_dist(&options); print_options_dist(&options); } /* Initialize ScalePermstruct and LUstruct. */ ScalePermstructInit(m, n, &ScalePermstruct); LUstructInit(n, &LUstruct); /* Initialize the statistics variables. */ PStatInit(&stat); /* Call the linear equation solver: factorize and solve. */ pzgssvx_ABglobal(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid, &LUstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ if ( !iam ) { zinf_norm_error_dist(n, nrhs, b, ldb, xtrue, ldx, &grid); } PStatPrint(&options, &stat, &grid); /* Print the statistics. */ PStatFree(&stat); Destroy_CompCol_Matrix_dist(&A); /* Deallocate storage of matrix A. */ SUPERLU_FREE(b); /* Free storage of right-hand side. */ /* ------------------------------------------------------------ NOW WE SOLVE ANOTHER LINEAR SYSTEM. THE MATRIX A HAS THE SAME SPARSITY PATTERN AND THE SIMILAR NUMERICAL VALUES AS THAT IN A PREVIOUS SYSTEM. ------------------------------------------------------------*/ options.Fact = SamePattern_SameRowPerm; PStatInit(&stat); /* Initialize the statistics variables. */ /* Create compressed column matrix for A. */ zCreate_CompCol_Matrix_dist(&A, m, n, nnz, a1, asub1, xa1, SLU_NC, SLU_Z, SLU_GE); /* Solve the linear system. */ pzgssvx_ABglobal(&options, &A, &ScalePermstruct, b1, ldb, nrhs, &grid, &LUstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ if ( !iam ) { printf("Solve a system with the same pattern and similar values.\n"); zinf_norm_error_dist(n, nrhs, b1, ldb, xtrue, ldx, &grid); } /* Print the statistics. */ PStatPrint(&options, &stat, &grid); /* ------------------------------------------------------------ DEALLOCATE STORAGE. ------------------------------------------------------------*/ PStatFree(&stat); Destroy_CompCol_Matrix_dist(&A); /* Deallocate storage of matrix A. */ Destroy_LU(n, &grid, &LUstruct); /* Deallocate storage associated with the L and U matrices. */ ScalePermstructFree(&ScalePermstruct); LUstructFree(&LUstruct); /* Deallocate the structure of L and U.*/ SUPERLU_FREE(b1); /* Free storage of right-hand side. */ SUPERLU_FREE(xtrue); /* Free storage of the exact solution. */ SUPERLU_FREE(berr); /* ------------------------------------------------------------ RELEASE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ out: superlu_gridexit(&grid); /* ------------------------------------------------------------ TERMINATES THE MPI EXECUTION ENVIRONMENT. ------------------------------------------------------------*/ MPI_Finalize(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit main()"); #endif } int cpp_defs() { printf(".. CPP definitions:\n"); #if ( PRNTlevel>=1 ) printf("\tPRNTlevel = %d\n", PRNTlevel); #endif #if ( DEBUGlevel>=1 ) printf("\tDEBUGlevel = %d\n", DEBUGlevel); #endif #if ( PROFlevel>=1 ) printf("\tPROFlevel = %d\n", PROFlevel); #endif #if ( StaticPivot>=1 ) printf("\tStaticPivot = %d\n", StaticPivot); #endif printf("....\n"); return 0; } SuperLU_DIST_5.3.0/EXAMPLE/dcreate_matrix_perturbed.c0000644013363400111340000001547113233431301021152 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Read the matrix from data file * *
 * -- Distributed SuperLU routine (version 5.1.3) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * December 31, 2016
 * 
*/ #include #include "superlu_ddefs.h" /* \brief * *
 * Purpose
 * =======
 * 
 * DCREATE_MATRIX_PERTURBED read the matrix from data file in
 * Harwell-Boeing format, and distribute it to processors in a distributed
 * compressed row format. It also generate the distributed true solution X
 * and the right-hand side RHS.
 *
 * Arguments   
 * =========      
 *
 * A     (output) SuperMatrix*
 *       Local matrix A in NR_loc format. 
 *
 * NRHS  (input) int_t
 *       Number of right-hand sides.
 *
 * RHS   (output) double**
 *       The right-hand side matrix.
 *
 * LDB   (output) int*
 *       Leading dimension of the right-hand side matrix.
 *
 * X     (output) double**
 *       The true solution matrix.
 *
 * LDX   (output) int*
 *       The leading dimension of the true solution matrix.
 *
 * FP    (input) FILE*
 *       The matrix file pointer.
 *
 * GRID  (input) gridinof_t*
 *       The 2D process mesh.
 * 
*/ int dcreate_matrix_perturbed(SuperMatrix *A, int nrhs, double **rhs, int *ldb, double **x, int *ldx, FILE *fp, gridinfo_t *grid) { SuperMatrix GA; /* global A */ double *b_global, *xtrue_global; /* replicated on all processes */ int_t *rowind, *colptr; /* global */ double *nzval; /* global */ double *nzval_loc; /* local */ int_t *colind, *rowptr; /* local */ int_t m, n, nnz; int_t m_loc, fst_row, nnz_loc; int_t m_loc_fst; /* Record m_loc of the first p-1 processors, when mod(m, p) is not zero. */ int_t row, col, i, j, relpos; int iam; char trans[1]; int_t *marker; iam = grid->iam; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter dcreate_matrix()"); #endif if ( !iam ) { /* Read the matrix stored on disk in Harwell-Boeing format. */ dreadhb_dist(iam, fp, &m, &n, &nnz, &nzval, &rowind, &colptr); /* Broadcast matrix A to the other PEs. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( nzval, nnz, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( rowind, nnz, mpi_int_t, 0, grid->comm ); MPI_Bcast( colptr, n+1, mpi_int_t, 0, grid->comm ); } else { /* Receive matrix A from PE 0. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid->comm ); /* Allocate storage for compressed column representation. */ dallocateA_dist(n, nnz, &nzval, &rowind, &colptr); MPI_Bcast( nzval, nnz, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( rowind, nnz, mpi_int_t, 0, grid->comm ); MPI_Bcast( colptr, n+1, mpi_int_t, 0, grid->comm ); } /* Perturbed the 1st and last diagonal of the matrix to lower values. Intention is to change perm_r[]. */ nzval[0] *= 0.01; nzval[nnz-1] *= 0.0001; /* Compute the number of rows to be distributed to local process */ m_loc = m / (grid->nprow * grid->npcol); m_loc_fst = m_loc; /* When m / procs is not an integer */ if ((m_loc * grid->nprow * grid->npcol) != m) { /*m_loc = m_loc+1; m_loc_fst = m_loc;*/ if (iam == (grid->nprow * grid->npcol - 1)) /* last proc. gets all*/ m_loc = m - m_loc * (grid->nprow * grid->npcol - 1); } /* Create compressed column matrix for GA. */ dCreate_CompCol_Matrix_dist(&GA, m, n, nnz, nzval, rowind, colptr, SLU_NC, SLU_D, SLU_GE); /* Generate the exact solution and compute the right-hand side. */ if ( !(b_global = doubleMalloc_dist(m*nrhs)) ) ABORT("Malloc fails for b[]"); if ( !(xtrue_global = doubleMalloc_dist(n*nrhs)) ) ABORT("Malloc fails for xtrue[]"); *trans = 'N'; dGenXtrue_dist(n, nrhs, xtrue_global, n); dFillRHS_dist(trans, nrhs, xtrue_global, n, &GA, b_global, m); /************************************************* * Change GA to a local A with NR_loc format * *************************************************/ rowptr = (int_t *) intMalloc_dist(m_loc+1); marker = (int_t *) intCalloc_dist(n); /* Get counts of each row of GA */ for (i = 0; i < n; ++i) for (j = colptr[i]; j < colptr[i+1]; ++j) ++marker[rowind[j]]; /* Set up row pointers */ rowptr[0] = 0; fst_row = iam * m_loc_fst; nnz_loc = 0; for (j = 0; j < m_loc; ++j) { row = fst_row + j; rowptr[j+1] = rowptr[j] + marker[row]; marker[j] = rowptr[j]; } nnz_loc = rowptr[m_loc]; nzval_loc = (double *) doubleMalloc_dist(nnz_loc); colind = (int_t *) intMalloc_dist(nnz_loc); /* Transfer the matrix into the compressed row storage */ for (i = 0; i < n; ++i) { for (j = colptr[i]; j < colptr[i+1]; ++j) { row = rowind[j]; if ( (row>=fst_row) && (row=2 ) if ( !iam ) dPrint_CompCol_Matrix_dist(&GA); #endif /* Destroy GA */ Destroy_CompCol_Matrix_dist(&GA); /******************************************************/ /* Change GA to a local A with NR_loc format */ /******************************************************/ /* Set up the local A in NR_loc format */ dCreate_CompRowLoc_Matrix_dist(A, m, n, nnz_loc, m_loc, fst_row, nzval_loc, colind, rowptr, SLU_NR_loc, SLU_D, SLU_GE); /* Get the local B */ if ( !((*rhs) = doubleMalloc_dist(m_loc*nrhs)) ) ABORT("Malloc fails for rhs[]"); for (j =0; j < nrhs; ++j) { for (i = 0; i < m_loc; ++i) { row = fst_row + i; (*rhs)[j*m_loc+i] = b_global[j*n+row]; } } *ldb = m_loc; /* Set the true X */ *ldx = m_loc; if ( !((*x) = doubleMalloc_dist(*ldx * nrhs)) ) ABORT("Malloc fails for x_loc[]"); /* Get the local part of xtrue_global */ for (j = 0; j < nrhs; ++j) { for (i = 0; i < m_loc; ++i) (*x)[i + j*(*ldx)] = xtrue_global[i + fst_row + j*n]; } SUPERLU_FREE(b_global); SUPERLU_FREE(xtrue_global); SUPERLU_FREE(marker); #if ( DEBUGlevel>=1 ) printf("sizeof(NRforamt_loc) %lu\n", sizeof(NRformat_loc)); CHECK_MALLOC(iam, "Exit dcreate_matrix()"); #endif return 0; } SuperLU_DIST_5.3.0/EXAMPLE/pzgstrs_Bglobal_Bsend.c0000644013363400111340000007433413233431301020357 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Solves a system of distributed linear equations * *
 * -- Distributed SuperLU routine (version 1.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 *
 * Modified:
 *     Feburary 7, 2001    use MPI_Isend/MPI_Irecv
 *     October 2, 2001     use MPI_Isend/MPI_Irecv with MPI_Test
 * 
*/ #include "superlu_zdefs.h" #define ISEND_IRECV #define BSEND 1 /* * Function prototypes */ #ifdef _CRAY fortran void CTRSM(_fcd, _fcd, _fcd, _fcd, int*, int*, doublecomplex*, doublecomplex*, int*, doublecomplex*, int*); fortran void CGEMM(_fcd, _fcd, int*, int*, int*, doublecomplex*, doublecomplex*, int*, doublecomplex*, int*, doublecomplex*, doublecomplex*, int*); _fcd ftcs1; _fcd ftcs2; _fcd ftcs3; #endif static void gather_diag_to_all(int_t, int_t, doublecomplex [], Glu_persist_t *, LocalLU_t *, gridinfo_t *, int_t, int_t [], int_t [], doublecomplex [], int_t, doublecomplex []); /*! \brief * *
 * Purpose
 * =======
 *
 * pzgstrs_Bglobal solves a system of distributed linear equations
 * A*X = B with a general N-by-N matrix A using the LU factorization
 * computed by pzgstrf.
 * 
 * Arguments
 * =========
 *
 * n      (input) int (global)
 *        The order of the system of linear equations.
 *
 * LUstruct (input) LUstruct_t*
 *        The distributed data structures storing L and U factors.
 *        The L and U factors are obtained from pzgstrf for
 *        the possibly scaled and permuted matrix A.
 *        See superlu_ddefs.h for the definition of 'LUstruct_t'.
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh. It contains the MPI communicator, the number
 *        of process rows (NPROW), the number of process columns (NPCOL),
 *        and my process rank. It is an input argument to all the
 *        parallel routines.
 *        Grid can be initialized by subroutine SUPERLU_GRIDINIT.
 *        See superlu_ddefs.h for the definition of 'gridinfo_t'.
 *
 * B      (input/output) doublecomplex*
 *        On entry, the right-hand side matrix of the possibly equilibrated
 *        and row permuted system.
 *        On exit, the solution matrix of the possibly equilibrated
 *        and row permuted system if info = 0;
 *
 *        NOTE: Currently, the N-by-NRHS  matrix B must reside on all 
 *              processes when calling this routine.
 *
 * ldb    (input) int (global)
 *        Leading dimension of matrix B.
 *
 * nrhs   (input) int (global)
 *        Number of right-hand sides.
 *
 * stat   (output) SuperLUStat_t*
 *        Record the statistics about the triangular solves.
 *        See util.h for the definition of 'SuperLUStat_t'.
 *
 * info   (output) int*
 * 	   = 0: successful exit
 *	   < 0: if info = -i, the i-th argument had an illegal value
 * 
*/ void pzgstrs_Bglobal(int_t n, LUstruct_t *LUstruct, gridinfo_t *grid, doublecomplex *B, int_t ldb, int nrhs, SuperLUStat_t *stat, int *info) { Glu_persist_t *Glu_persist = LUstruct->Glu_persist; LocalLU_t *Llu = LUstruct->Llu; doublecomplex alpha = {1.0, 0.0}; doublecomplex zero = {0.0, 0.0}; doublecomplex *lsum; /* Local running sum of the updates to B-components */ doublecomplex *x; /* X component at step k. */ doublecomplex *lusup, *dest; doublecomplex *recvbuf, *tempv; doublecomplex *rtemp; /* Result of full matrix-vector multiply. */ int_t **Ufstnz_br_ptr = Llu->Ufstnz_br_ptr; int_t *Urbs, *Urbs1; /* Number of row blocks in each block column of U. */ Ucb_indptr_t **Ucb_indptr;/* Vertical linked list pointing to Uindex[] */ int_t **Ucb_valptr; /* Vertical linked list pointing to Unzval[] */ int_t iam, kcol, krow, mycol, myrow; int_t i, ii, il, j, jj, k, lb, ljb, lk, lptr, luptr; int_t nb, nlb, nub, nsupers; int_t *xsup, *lsub, *usub; int_t *ilsum; /* Starting position of each supernode in lsum (LOCAL)*/ int_t Pc, Pr; int knsupc, nsupr; int ldalsum; /* Number of lsum entries locally owned. */ int maxrecvsz, p, pi; int_t **Lrowind_bc_ptr; doublecomplex **Lnzval_bc_ptr; MPI_Status status; #if defined(ISEND_IRECV) || defined(BSEND) MPI_Request *send_req; int test_flag; #endif /*-- Counts used for L-solve --*/ int_t *fmod; /* Modification count for L-solve. */ int_t **fsendx_plist = Llu->fsendx_plist; int_t nfrecvx = Llu->nfrecvx; /* Number of X components to be recv'd. */ int_t *frecv; /* Count of modifications to be recv'd from processes in this row. */ int_t nfrecvmod = 0; /* Count of total modifications to be recv'd. */ int_t nleaf = 0, nroot = 0; /*-- Counts used for U-solve --*/ int_t *bmod; /* Modification count for L-solve. */ int_t **bsendx_plist = Llu->bsendx_plist; int_t nbrecvx = Llu->nbrecvx; /* Number of X components to be recv'd. */ int_t *brecv; /* Count of modifications to be recv'd from processes in this row. */ int_t nbrecvmod = 0; /* Count of total modifications to be recv'd. */ double t; #if ( DEBUGlevel>=2 ) int_t Ublocks = 0; #endif t = SuperLU_timer_(); /* Test input parameters. */ *info = 0; if ( n < 0 ) *info = -1; else if ( nrhs < 0 ) *info = -9; if ( *info ) { pxerbla("PDGSTRS_BGLOBAL", grid, -*info); return; } /* * Initialization. */ iam = grid->iam; #ifdef BSEND if(!iam) { printf("Using MPI_Bsend in complex triangular solve\n"); fflush(stdout); } #endif Pc = grid->npcol; Pr = grid->nprow; myrow = MYROW( iam, grid ); mycol = MYCOL( iam, grid ); nsupers = Glu_persist->supno[n-1] + 1; xsup = Glu_persist->xsup; Lrowind_bc_ptr = Llu->Lrowind_bc_ptr; Lnzval_bc_ptr = Llu->Lnzval_bc_ptr; nlb = CEILING( nsupers, Pr ); /* Number of local block rows. */ stat->ops[SOLVE] = 0.0; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter pdgstrs_Bglobal()"); #endif /* Save the count to be altered so it can be used by subsequent call to PDGSTRS_BGLOBAL. */ if ( !(fmod = intMalloc_dist(nlb)) ) ABORT("Calloc fails for fmod[]."); for (i = 0; i < nlb; ++i) fmod[i] = Llu->fmod[i]; if ( !(frecv = intMalloc_dist(nlb)) ) ABORT("Malloc fails for frecv[]."); Llu->frecv = frecv; #if defined(ISEND_IRECV) || defined(BSEND) if ( !(send_req = (MPI_Request*) SUPERLU_MALLOC(Pr*sizeof(MPI_Request))) ) ABORT("Malloc fails for send_req[]."); for (i = 0; i < Pr; ++i) send_req[i] = MPI_REQUEST_NULL; #endif #ifdef _CRAY ftcs1 = _cptofcd("L", strlen("L")); ftcs2 = _cptofcd("N", strlen("N")); ftcs3 = _cptofcd("U", strlen("U")); #endif /* Obtain ilsum[] and ldalsum for process column 0. */ ilsum = Llu->ilsum; ldalsum = Llu->ldalsum; /* Allocate working storage. */ knsupc = sp_ienv_dist(3); k = knsupc * nrhs + SUPERLU_MAX( XK_H, LSUM_H ); maxrecvsz = knsupc * nrhs + SUPERLU_MAX( XK_H, LSUM_H ); if ( !(lsum = doublecomplexCalloc_dist(((size_t)ldalsum) * nrhs + nlb * LSUM_H)) ) ABORT("Calloc fails for lsum[]."); if ( !(x = doublecomplexMalloc_dist(((size_t)ldalsum) * nrhs + nlb * XK_H)) ) ABORT("Malloc fails for x[]."); if ( !(recvbuf = doublecomplexMalloc_dist(k)) ) ABORT("Malloc fails for recvbuf[]."); if ( !(rtemp = doublecomplexMalloc_dist(k)) ) ABORT("Malloc fails for rtemp[]."); /*--------------------------------------------------- * Forward solve Ly = b. *---------------------------------------------------*/ /* * Copy B into X on the diagonal processes. */ ii = 0; for (k = 0; k < nsupers; ++k) { knsupc = SuperSize( k ); krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ il = LSUM_BLK( lk ); lsum[il - LSUM_H].r = k;/* Block number prepended in the header. */ lsum[il - LSUM_H].i = 0; kcol = PCOL( k, grid ); if ( mycol == kcol ) { /* Diagonal process. */ jj = X_BLK( lk ); x[jj - XK_H].r = k; /* Block number prepended in the header. */ x[jj - XK_H].i = 0; RHS_ITERATE(j) for (i = 0; i < knsupc; ++i) /* X is stored in blocks. */ x[i + jj + j*knsupc] = B[i + ii + j*ldb]; } } ii += knsupc; } /* * Compute frecv[] and nfrecvmod counts on the diagonal processes. */ { superlu_scope_t *scp = &grid->rscp; for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ kcol = PCOL( k, grid ); /* Root process in this row scope. */ if ( mycol != kcol && fmod[lk] ) i = 1; /* Contribution from non-diagonal process. */ else i = 0; MPI_Reduce( &i, &frecv[lk], 1, mpi_int_t, MPI_SUM, kcol, scp->comm ); if ( mycol == kcol ) { /* Diagonal process. */ nfrecvmod += frecv[lk]; if ( !frecv[lk] && !fmod[lk] ) ++nleaf; #if ( DEBUGlevel>=2 ) printf("(%2d) frecv[%4d] %2d\n", iam, k, frecv[lk]); assert( frecv[lk] < Pc ); #endif } } } } /* --------------------------------------------------------- Solve the leaf nodes first by all the diagonal processes. --------------------------------------------------------- */ #if ( DEBUGlevel>=1 ) printf("(%2d) nleaf %4d\n", iam, nleaf); #endif for (k = 0; k < nsupers && nleaf; ++k) { krow = PROW( k, grid ); kcol = PCOL( k, grid ); if ( myrow == krow && mycol == kcol ) { /* Diagonal process */ knsupc = SuperSize( k ); lk = LBi( k, grid ); if ( frecv[lk]==0 && fmod[lk]==0 ) { fmod[lk] = -1; /* Do not solve X[k] in the future. */ ii = X_BLK( lk ); lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY CTRSM(ftcs1, ftcs1, ftcs2, ftcs3, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #else ztrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif stat->ops[SOLVE] += 4 * knsupc * (knsupc - 1) * nrhs + 10 * knsupc * nrhs; /* complex division */ --nleaf; #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ for (p = 0; p < Pr; ++p) if ( fsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); #ifdef ISEND_IRECV #if 1 MPI_Test( &send_req[p], &test_flag, &status ); #else if ( send_req[p] != MPI_REQUEST_NULL ) MPI_Wait( &send_req[p], &status ); #endif MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm, &send_req[p]); #else #ifdef BSEND MPI_Bsend( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm ); #else MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm ); #endif #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii-XK_H], pi); #endif } /* * Perform local block modifications: lsum[i] -= L_i,k * X[k] */ nb = lsub[0] - 1; lptr = BC_HEADER + LB_DESCRIPTOR + knsupc; luptr = knsupc; /* Skip diagonal block L(k,k). */ zlsum_fmod(lsum, x, &x[ii], rtemp, nrhs, knsupc, k, fmod, nb, lptr, luptr, xsup, grid, Llu, send_req,stat); #ifdef ISEND_IRECV /* Wait for previous Isends to complete. */ for (p = 0; p < Pr; ++p) { if ( fsendx_plist[lk][p] != EMPTY ) /*MPI_Wait( &send_req[p], &status );*/ MPI_Test( &send_req[p], &test_flag, &status ); } #endif } } /* if diagonal process ... */ } /* for k ... */ /* ----------------------------------------------------------- Compute the internal nodes asynchronously by all processes. ----------------------------------------------------------- */ #if ( DEBUGlevel>=1 ) printf("(%2d) nfrecvx %4d, nfrecvmod %4d, nleaf %4d\n", iam, nfrecvx, nfrecvmod, nleaf); #endif while ( nfrecvx || nfrecvmod ) { /* While not finished. */ /* Receive a message. */ #if 1 MPI_Recv( recvbuf, maxrecvsz, SuperLU_MPI_DOUBLE_COMPLEX, MPI_ANY_SOURCE, MPI_ANY_TAG, grid->comm, &status ); #else /* -MPI- FATAL: Remote protocol queue full */ MPI_Irecv( recvbuf, maxrecvsz, SuperLU_MPI_DOUBLE_COMPLEX, MPI_ANY_SOURCE, MPI_ANY_TAG, grid->comm, &request ); MPI_Wait( &request, &status ); #endif k = (*recvbuf).r; #if ( DEBUGlevel>=2 ) printf("(%2d) Recv'd block %d, tag %2d\n", iam, k, status.MPI_TAG); #endif switch ( status.MPI_TAG ) { case Xk: --nfrecvx; lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; if ( lsub ) { nb = lsub[0]; lptr = BC_HEADER; luptr = 0; knsupc = SuperSize( k ); /* * Perform local block modifications: lsum[i] -= L_i,k * X[k] */ zlsum_fmod(lsum, x, &recvbuf[XK_H], rtemp, nrhs, knsupc, k, fmod, nb, lptr, luptr, xsup, grid, Llu, send_req, stat); } /* if lsub */ break; case LSUM: --nfrecvmod; lk = LBi( k, grid ); /* Local block number, row-wise. */ ii = X_BLK( lk ); knsupc = SuperSize( k ); tempv = &recvbuf[LSUM_H]; RHS_ITERATE(j) for (i = 0; i < knsupc; ++i) z_add(&x[i + ii + j*knsupc], &x[i + ii + j*knsupc], &tempv[i + j*knsupc]); if ( (--frecv[lk])==0 && fmod[lk]==0 ) { fmod[lk] = -1; /* Do not solve X[k] in the future. */ lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY CTRSM(ftcs1, ftcs1, ftcs2, ftcs3, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #else ztrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif stat->ops[SOLVE] += 4 * knsupc * (knsupc - 1) * nrhs + 10 * knsupc * nrhs; /* complex division */ #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ kcol = PCOL( k, grid ); for (p = 0; p < Pr; ++p) if ( fsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); #ifdef ISEND_IRECV #if 1 MPI_Test( &send_req[p], &test_flag, &status ); #else if ( send_req[p] != MPI_REQUEST_NULL ) MPI_Wait( &send_req[p], &status ); #endif MPI_Isend( &x[ii-XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm, &send_req[p]); #else #ifdef BSEND MPI_Bsend( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm ); #else MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm ); #endif #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii-XK_H], pi); #endif } /* * Perform local block modifications. */ nb = lsub[0] - 1; lptr = BC_HEADER + LB_DESCRIPTOR + knsupc; luptr = knsupc; /* Skip diagonal block L(k,k). */ zlsum_fmod(lsum, x, &x[ii], rtemp, nrhs, knsupc, k, fmod, nb, lptr, luptr, xsup, grid, Llu, send_req, stat); #ifdef ISEND_IRECV /* Wait for the previous Isends to complete. */ for (p = 0; p < Pr; ++p) { if ( fsendx_plist[lk][p] != EMPTY ) MPI_Test( &send_req[p], &test_flag, &status ); } #endif } /* if */ break; #if ( DEBUGlevel>=1 ) default: printf("(%2d) Recv'd wrong message tag %4d\n", status.MPI_TAG); break; #endif } /* switch */ } /* while not finished ... */ #if ( PRNTlevel>=2 ) t = SuperLU_timer_() - t; if ( !iam ) printf(".. L-solve time\t%8.2f\n", t); t = SuperLU_timer_(); #endif #if ( PRNTlevel==2 ) printf("\n(%d) .. After L-solve: y =\n", iam); for (i = 0, k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); kcol = PCOL( k, grid ); if ( myrow == krow && mycol == kcol ) { /* Diagonal process */ knsupc = SuperSize( k ); lk = LBi( k, grid ); ii = X_BLK( lk ); for (j = 0; j < knsupc; ++j) printf("\t(%d)\t%4d\t%.10f\n", iam, xsup[k]+j, x[ii+j]); } MPI_Barrier( grid->comm ); } #endif SUPERLU_FREE(fmod); SUPERLU_FREE(frecv); SUPERLU_FREE(rtemp); /* MPI_Barrier( grid->comm ); Drain messages in the forward solve. */ /*--------------------------------------------------- * Back solve Ux = y. * * The Y components from the forward solve is already * on the diagonal processes. *---------------------------------------------------*/ /* Save the count to be altered so it can be used by subsequent call to PDGSTRS_BGLOBAL. */ if ( !(bmod = intMalloc_dist(nlb)) ) ABORT("Calloc fails for bmod[]."); for (i = 0; i < nlb; ++i) bmod[i] = Llu->bmod[i]; if ( !(brecv = intMalloc_dist(nlb)) ) ABORT("Malloc fails for brecv[]."); Llu->brecv = brecv; /* * Compute brecv[] and nbrecvmod counts on the diagonal processes. */ { superlu_scope_t *scp = &grid->rscp; for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { lk = LBi( k, grid ); /* Local block number. */ kcol = PCOL( k, grid ); /* Root process in this row scope. */ if ( mycol != kcol && bmod[lk] ) i = 1; /* Contribution from non-diagonal process. */ else i = 0; MPI_Reduce( &i, &brecv[lk], 1, mpi_int_t, MPI_SUM, kcol, scp->comm ); if ( mycol == kcol ) { /* Diagonal process. */ nbrecvmod += brecv[lk]; if ( !brecv[lk] && !bmod[lk] ) ++nroot; #if ( DEBUGlevel>=2 ) printf("(%2d) brecv[%4d] %2d\n", iam, k, brecv[lk]); assert( brecv[lk] < Pc ); #endif } } } } /* Re-initialize lsum to zero. Each block header is already in place. */ for (k = 0; k < nsupers; ++k) { krow = PROW( k, grid ); if ( myrow == krow ) { knsupc = SuperSize( k ); lk = LBi( k, grid ); il = LSUM_BLK( lk ); dest = &lsum[il]; RHS_ITERATE(j) for (i = 0; i < knsupc; ++i) dest[i + j*knsupc] = zero; } } /* Set up additional pointers for the index and value arrays of U. nlb is the number of local block rows. */ nub = CEILING( nsupers, Pc ); /* Number of local block columns. */ if ( !(Urbs = (int_t *) intCalloc_dist(2*((size_t)nub))) ) ABORT("Malloc fails for Urbs[]"); /* Record number of nonzero blocks in a block column. */ Urbs1 = Urbs + nub; if ( !(Ucb_indptr = SUPERLU_MALLOC(nub * sizeof(Ucb_indptr_t *))) ) ABORT("Malloc fails for Ucb_indptr[]"); if ( !(Ucb_valptr = SUPERLU_MALLOC(nub * sizeof(int_t *))) ) ABORT("Malloc fails for Ucb_valptr[]"); /* Count number of row blocks in a block column. One pass of the skeleton graph of U. */ for (lk = 0; lk < nlb; ++lk) { usub = Ufstnz_br_ptr[lk]; if ( usub ) { /* Not an empty block row. */ /* usub[0] -- number of column blocks in this block row. */ #if ( DEBUGlevel>=2 ) Ublocks += usub[0]; #endif i = BR_HEADER; /* Pointer in index array. */ for (lb = 0; lb < usub[0]; ++lb) { /* For all column blocks. */ k = usub[i]; /* Global block number */ ++Urbs[LBj(k,grid)]; i += UB_DESCRIPTOR + SuperSize( k ); } } } /* Set up the vertical linked lists for the row blocks. One pass of the skeleton graph of U. */ for (lb = 0; lb < nub; ++lb) if ( Urbs[lb] ) { /* Not an empty block column. */ if ( !(Ucb_indptr[lb] = SUPERLU_MALLOC(Urbs[lb] * sizeof(Ucb_indptr_t))) ) ABORT("Malloc fails for Ucb_indptr[lb][]"); if ( !(Ucb_valptr[lb] = (int_t *) intMalloc_dist(Urbs[lb])) ) ABORT("Malloc fails for Ucb_valptr[lb][]"); } for (lk = 0; lk < nlb; ++lk) { /* For each block row. */ usub = Ufstnz_br_ptr[lk]; if ( usub ) { /* Not an empty block row. */ i = BR_HEADER; /* Pointer in index array. */ j = 0; /* Pointer in nzval array. */ for (lb = 0; lb < usub[0]; ++lb) { /* For all column blocks. */ k = usub[i]; /* Global block number, column-wise. */ ljb = LBj( k, grid ); /* Local block number, column-wise. */ Ucb_indptr[ljb][Urbs1[ljb]].lbnum = lk; Ucb_indptr[ljb][Urbs1[ljb]].indpos = i; Ucb_valptr[ljb][Urbs1[ljb]] = j; ++Urbs1[ljb]; j += usub[i+1]; i += UB_DESCRIPTOR + SuperSize( k ); } } } #if ( DEBUGlevel>=2 ) for (p = 0; p < Pr*Pc; ++p) { if (iam == p) { printf("(%2d) .. Ublocks %d\n", iam, Ublocks); for (lb = 0; lb < nub; ++lb) { printf("(%2d) Local col %2d: # row blocks %2d\n", iam, lb, Urbs[lb]); if ( Urbs[lb] ) { for (i = 0; i < Urbs[lb]; ++i) printf("(%2d) .. row blk %2d:\ lbnum %d, indpos %d, valpos %d\n", iam, i, Ucb_indptr[lb][i].lbnum, Ucb_indptr[lb][i].indpos, Ucb_valptr[lb][i]); } } } MPI_Barrier( grid->comm ); } for (p = 0; p < Pr*Pc; ++p) { if ( iam == p ) { printf("\n(%d) bsendx_plist[][]", iam); for (lb = 0; lb < nub; ++lb) { printf("\n(%d) .. local col %2d: ", iam, lb); for (i = 0; i < Pr; ++i) printf("%4d", bsendx_plist[lb][i]); } printf("\n"); } MPI_Barrier( grid->comm ); } #endif /* DEBUGlevel */ #if ( PRNTlevel>=3 ) t = SuperLU_timer_() - t; if ( !iam) printf(".. Setup U-solve time\t%8.2f\n", t); t = SuperLU_timer_(); #endif /* * Solve the roots first by all the diagonal processes. */ #if ( DEBUGlevel>=1 ) printf("(%2d) nroot %4d\n", iam, nroot); #endif for (k = nsupers-1; k >= 0 && nroot; --k) { krow = PROW( k, grid ); kcol = PCOL( k, grid ); if ( myrow == krow && mycol == kcol ) { /* Diagonal process. */ knsupc = SuperSize( k ); lk = LBi( k, grid ); /* Local block number, row-wise. */ if ( brecv[lk]==0 && bmod[lk]==0 ) { bmod[lk] = -1; /* Do not solve X[k] in the future. */ ii = X_BLK( lk ); lk = LBj( k, grid ); /* Local block number, column-wise */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY CTRSM(ftcs1, ftcs3, ftcs2, ftcs2, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #else ztrsm_("L", "U", "N", "N", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif stat->ops[SOLVE] += 4 * knsupc * (knsupc + 1) * nrhs + 10 * knsupc * nrhs; /* complex division */ --nroot; #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ for (p = 0; p < Pr; ++p) if ( bsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); #ifdef ISEND_IRECV #if 1 MPI_Test( &send_req[p], &test_flag, &status ); #else if ( send_req[p] != MPI_REQUEST_NULL ) MPI_Wait( &send_req[p], &status ); #endif MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm, &send_req[p]); #else #ifdef BSEND MPI_Bsend( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm ); #else MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm ); #endif #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii-XK_H], pi); #endif } /* * Perform local block modifications: lsum[i] -= U_i,k * X[k] */ if ( Urbs[lk] ) zlsum_bmod(lsum, x, &x[ii], nrhs, k, bmod, Urbs, Ucb_indptr, Ucb_valptr, xsup, grid, Llu, send_req, stat); #ifdef ISEND_IRECV /* Wait for the previous Isends to complete. */ for (p = 0; p < Pr; ++p) { if ( bsendx_plist[lk][p] != EMPTY ) MPI_Test( &send_req[p], &test_flag, &status ); } #endif } /* if root ... */ } /* if diagonal process ... */ } /* for k ... */ /* * Compute the internal nodes asynchronously by all processes. */ while ( nbrecvx || nbrecvmod ) { /* While not finished. */ /* Receive a message. */ MPI_Recv( recvbuf, maxrecvsz, SuperLU_MPI_DOUBLE_COMPLEX, MPI_ANY_SOURCE, MPI_ANY_TAG, grid->comm, &status ); k = (*recvbuf).r; #if ( DEBUGlevel>=2 ) printf("(%2d) Recv'd block %d, tag %2d\n", iam, k, status.MPI_TAG); #endif switch ( status.MPI_TAG ) { case Xk: --nbrecvx; lk = LBj( k, grid ); /* Local block number, column-wise. */ /* * Perform local block modifications: * lsum[i] -= U_i,k * X[k] */ zlsum_bmod(lsum, x, &recvbuf[XK_H], nrhs, k, bmod, Urbs, Ucb_indptr, Ucb_valptr, xsup, grid, Llu, send_req, stat); break; case LSUM: --nbrecvmod; lk = LBi( k, grid ); /* Local block number, row-wise. */ ii = X_BLK( lk ); knsupc = SuperSize( k ); tempv = &recvbuf[LSUM_H]; RHS_ITERATE(j) for (i = 0; i < knsupc; ++i) z_add(&x[i + ii + j*knsupc], &x[i + ii + j*knsupc], &tempv[i + j*knsupc]); if ( (--brecv[lk])==0 && bmod[lk]==0 ) { bmod[lk] = -1; /* Do not solve X[k] in the future. */ lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Lrowind_bc_ptr[lk]; lusup = Lnzval_bc_ptr[lk]; nsupr = lsub[1]; #ifdef _CRAY CTRSM(ftcs1, ftcs3, ftcs2, ftcs2, &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #else ztrsm_("L", "U", "N", "N", &knsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &knsupc); #endif stat->ops[SOLVE] += 4 * knsupc * (knsupc + 1) * nrhs + 10 * knsupc * nrhs; /* complex division */ #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, k); #endif /* * Send Xk to process column Pc[k]. */ kcol = PCOL( k, grid ); for (p = 0; p < Pr; ++p) if ( bsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, kcol, grid ); #ifdef ISEND_IRECV #if 1 MPI_Test( &send_req[p], &test_flag, &status ); #else if ( send_req[p] != MPI_REQUEST_NULL ) MPI_Wait( &send_req[p], &status ); #endif MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm, &send_req[p] ); #else #ifdef BSEND MPI_Bsend( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm ); #else MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm ); #endif #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii - XK_H], pi); #endif } /* * Perform local block modifications: * lsum[i] -= U_i,k * X[k] */ if ( Urbs[lk] ) zlsum_bmod(lsum, x, &x[ii], nrhs, k, bmod, Urbs, Ucb_indptr, Ucb_valptr, xsup, grid, Llu, send_req, stat); #ifdef ISEND_IRECV /* Wait for the previous Isends to complete. */ for (p = 0; p < Pr; ++p) { if ( bsendx_plist[lk][p] != EMPTY ) /*MPI_Wait( &send_req[p], &status );*/ MPI_Test( &send_req[p], &test_flag, &status ); } #endif } /* if becomes solvable */ break; #if ( DEBUGlevel>=1 ) default: printf("(%2d) Recv'd wrong message tag %4d\n", status.MPI_TAG); break; #endif } /* switch */ } /* while not finished ... */ #if ( PRNTlevel>=3 ) t = SuperLU_timer_() - t; if ( !iam ) printf(".. U-solve time\t%8.2f\n", t); #endif /* Copy the solution X into B (on all processes). */ { int_t num_diag_procs, *diag_procs, *diag_len; doublecomplex *work; get_diag_procs(n, Glu_persist, grid, &num_diag_procs, &diag_procs, &diag_len); jj = diag_len[0]; for (j = 1; j < num_diag_procs; ++j) jj = SUPERLU_MAX(jj, diag_len[j]); if ( !(work = doublecomplexMalloc_dist(((size_t)jj)*nrhs)) ) ABORT("Malloc fails for work[]"); gather_diag_to_all(n, nrhs, x, Glu_persist, Llu, grid, num_diag_procs, diag_procs, diag_len, B, ldb, work); SUPERLU_FREE(diag_procs); SUPERLU_FREE(diag_len); SUPERLU_FREE(work); } /* Deallocate storage. */ SUPERLU_FREE(lsum); SUPERLU_FREE(x); SUPERLU_FREE(recvbuf); for (i = 0; i < nub; ++i) if ( Urbs[i] ) { SUPERLU_FREE(Ucb_indptr[i]); SUPERLU_FREE(Ucb_valptr[i]); } SUPERLU_FREE(Ucb_indptr); SUPERLU_FREE(Ucb_valptr); SUPERLU_FREE(Urbs); SUPERLU_FREE(bmod); SUPERLU_FREE(brecv); #ifdef ISEND_IRECV for (p = 0; p < Pr; ++p) { if ( send_req[p] != MPI_REQUEST_NULL ) MPI_Wait( &send_req[p], &status ); } SUPERLU_FREE(send_req); #endif stat->utime[SOLVE] = SuperLU_timer_() - t; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pdgstrs_Bglobal()"); #endif } /* PZGSTRS_BGLOBAL */ /*! \brief * *
 * Gather the components of x vector on the diagonal processes
 * onto all processes, and combine them into the global vector y.
 * 
*/ static void gather_diag_to_all(int_t n, int_t nrhs, doublecomplex x[], Glu_persist_t *Glu_persist, LocalLU_t *Llu, gridinfo_t *grid, int_t num_diag_procs, int_t diag_procs[], int_t diag_len[], doublecomplex y[], int_t ldy, doublecomplex work[]) { int_t i, ii, j, k, lk, lwork, nsupers, p; int_t *ilsum, *xsup; int iam, knsupc, pkk; doublecomplex *x_col, *y_col; iam = grid->iam; nsupers = Glu_persist->supno[n-1] + 1; xsup = Glu_persist->xsup; ilsum = Llu->ilsum; for (p = 0; p < num_diag_procs; ++p) { pkk = diag_procs[p]; if ( iam == pkk ) { /* Copy x vector into a buffer. */ lwork = 0; for (k = p; k < nsupers; k += num_diag_procs) { knsupc = SuperSize( k ); lk = LBi( k, grid ); ii = X_BLK( lk ); /*ilsum[lk] + (lk+1)*XK_H;*/ x_col = &x[ii]; for (j = 0; j < nrhs; ++j) { for (i = 0; i < knsupc; ++i) work[i+lwork] = x_col[i]; lwork += knsupc; x_col += knsupc; } } MPI_Bcast( work, lwork, SuperLU_MPI_DOUBLE_COMPLEX, pkk, grid->comm ); } else { MPI_Bcast( work, diag_len[p]*nrhs, SuperLU_MPI_DOUBLE_COMPLEX, pkk, grid->comm ); } /* Scatter work[] into global y vector. */ lwork = 0; for (k = p; k < nsupers; k += num_diag_procs) { knsupc = SuperSize( k ); ii = FstBlockC( k ); y_col = &y[ii]; for (j = 0; j < nrhs; ++j) { for (i = 0; i < knsupc; ++i) y_col[i] = work[i+lwork]; lwork += knsupc; y_col += ldy; } } } } /* GATHER_DIAG_TO_ALL */ SuperLU_DIST_5.3.0/EXAMPLE/pzdrive4_ABglobal.c0000644013363400111340000002754513233431301017402 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief This example illustrates how to divide up the processes into subgroups * *
 * -- Distributed SuperLU routine (version 4.1) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 * April 5, 2015
 * 
*/ #include #include "superlu_zdefs.h" /*! \brief * *
 * Purpose
 * =======
 *
 * The driver program pzdrive4_ABglobal.
 *
 * This example illustrates how to divide up the processes into
 * subgroups (multiple grids) such that each subgroup solves a linear
 * system independently from the other.
 *
 * In this example, there are 2 subgroups:
 *  1. subgroup 1 consists of processes 0 to 5 arranged as
 *     a 2-by-3 process grid.
 *  2. subgroup 2 consists of processes 6 to 9 arranged as
 *     a 2-by-2 process grid.
 *
 * On an IBM SP, the program may be run by typing
 *    poe pzdrive4_ABglobal  -procs 10
 * 
*/ int main(int argc, char *argv[]) { superlu_dist_options_t options; SuperLUStat_t stat; SuperMatrix A; ScalePermstruct_t ScalePermstruct; LUstruct_t LUstruct; gridinfo_t grid1, grid2; double *berr; doublecomplex *a, *b, *xtrue; int_t *asub, *xa; int_t i, j, m, n, nnz; int_t nprow, npcol, ldumap, p; int_t usermap[6]; int iam, info, ldb, ldx, nprocs; int nrhs = 1; /* Number of right-hand side. */ char trans[1]; char **cpp, c; FILE *fp, *fopen(); /* prototypes */ extern void LUstructInit(const int_t, LUstruct_t *); extern void LUstructFree(LUstruct_t *); extern void Destroy_LU(int_t, gridinfo_t *, LUstruct_t *); /* ------------------------------------------------------------ INITIALIZE MPI ENVIRONMENT. ------------------------------------------------------------*/ MPI_Init( &argc, &argv ); MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); if ( nprocs < 10 ) { fprintf(stderr, "Requires at least 10 processes\n"); exit(-1); } /* Parse command line argv[]. */ for (cpp = argv+1; *cpp; ++cpp) { if ( **cpp == '-' ) { c = *(*cpp+1); ++cpp; switch (c) { case 'h': printf("Options:\n"); printf("\t-r : process rows (default %d)\n", nprow); printf("\t-c : process columns (default %d)\n", npcol); exit(0); break; case 'r': nprow = atoi(*cpp); break; case 'c': npcol = atoi(*cpp); break; } } else { /* Last arg is considered a filename */ if ( !(fp = fopen(*cpp, "r")) ) { ABORT("File does not exist"); } break; } } /* ------------------------------------------------------------ INITIALIZE THE SUPERLU PROCESS GRID 1. ------------------------------------------------------------*/ nprow = 2; npcol = 3; ldumap = 2; p = 0; /* Grid 1 starts from process 0. */ for (i = 0; i < nprow; ++i) for (j = 0; j < npcol; ++j) usermap[i+j*ldumap] = p++; superlu_gridmap(MPI_COMM_WORLD, nprow, npcol, usermap, ldumap, &grid1); /* ------------------------------------------------------------ INITIALIZE THE SUPERLU PROCESS GRID 2. ------------------------------------------------------------*/ nprow = 2; npcol = 2; ldumap = 2; p = 6; /* Grid 2 starts from process 6. */ for (i = 0; i < nprow; ++i) for (j = 0; j < npcol; ++j) usermap[i+j*ldumap] = p++; superlu_gridmap(MPI_COMM_WORLD, nprow, npcol, usermap, ldumap, &grid2); /* Bail out if I do not belong in any of the 2 grids. */ MPI_Comm_rank( MPI_COMM_WORLD, &iam ); if ( iam >= 10 ) goto out; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter main()"); #endif if ( iam >= 0 && iam < 6 ) { /* I am in grid 1. */ iam = grid1.iam; /* Get the logical number in the new grid. */ /* ------------------------------------------------------------ PROCESS 0 READS THE MATRIX A, AND THEN BROADCASTS IT TO ALL THE OTHER PROCESSES. ------------------------------------------------------------*/ if ( !iam ) { /* Read the matrix stored on disk in Harwell-Boeing format. */ zreadhb_dist(iam, fp, &m, &n, &nnz, &a, &asub, &xa); printf("\tDimension\t%dx%d\t # nonzeros %d\n", m, n, nnz); printf("\tProcess grid\t%d X %d\n", (int) grid1.nprow, (int) grid1.npcol); /* Broadcast matrix A to the other PEs. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid1.comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid1.comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid1.comm ); MPI_Bcast( a, nnz, SuperLU_MPI_DOUBLE_COMPLEX, 0, grid1.comm ); MPI_Bcast( asub, nnz, mpi_int_t, 0, grid1.comm ); MPI_Bcast( xa, n+1, mpi_int_t, 0, grid1.comm ); } else { /* Receive matrix A from PE 0. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid1.comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid1.comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid1.comm ); /* Allocate storage for compressed column representation. */ zallocateA_dist(n, nnz, &a, &asub, &xa); MPI_Bcast( a, nnz, SuperLU_MPI_DOUBLE_COMPLEX, 0, grid1.comm ); MPI_Bcast( asub, nnz, mpi_int_t, 0, grid1.comm ); MPI_Bcast( xa, n+1, mpi_int_t, 0, grid1.comm ); } /* Create compressed column matrix for A. */ zCreate_CompCol_Matrix_dist(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_Z, SLU_GE); /* Generate the exact solution and compute the right-hand side. */ if (!(b=doublecomplexMalloc_dist(m*nrhs))) ABORT("Malloc fails for b[]"); if (!(xtrue=doublecomplexMalloc_dist(n*nrhs))) ABORT("Malloc fails for xtrue[]"); *trans = 'N'; ldx = n; ldb = m; zGenXtrue_dist(n, nrhs, xtrue, ldx); zFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb); if ( !(berr = doubleMalloc_dist(nrhs)) ) ABORT("Malloc fails for berr[]."); /* ------------------------------------------------------------ NOW WE SOLVE THE LINEAR SYSTEM. ------------------------------------------------------------*/ /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = METIS_AT_PLUS_A; options.RowPerm = LargeDiag; options.ReplaceTinyPivot = YES; options.Trans = NOTRANS; options.IterRefine = DOUBLE; options.SolveInitialized = NO; options.RefineInitialized = NO; options.PrintStat = YES; */ set_default_options_dist(&options); if (!iam) { print_sp_ienv_dist(&options); print_options_dist(&options); } /* Initialize ScalePermstruct and LUstruct. */ ScalePermstructInit(m, n, &ScalePermstruct); LUstructInit(n, &LUstruct); /* Initialize the statistics variables. */ PStatInit(&stat); /* Call the linear equation solver: factorize and solve. */ pzgssvx_ABglobal(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid1, &LUstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ if ( !iam ) { zinf_norm_error_dist(n, nrhs, b, ldb, xtrue, ldx, &grid1); } /* Print the statistics. */ PStatPrint(&options, &stat, &grid1); /* ------------------------------------------------------------ DEALLOCATE STORAGE. ------------------------------------------------------------*/ PStatFree(&stat); Destroy_CompCol_Matrix_dist(&A); Destroy_LU(n, &grid1, &LUstruct); ScalePermstructFree(&ScalePermstruct); LUstructFree(&LUstruct); SUPERLU_FREE(b); SUPERLU_FREE(xtrue); SUPERLU_FREE(berr); } else { /* I am in grid 2. */ iam = grid2.iam; /* Get the logical number in the new grid. */ /* ------------------------------------------------------------ PROCESS 0 READS THE MATRIX A, AND THEN BROADCASTS IT TO ALL THE OTHER PROCESSES. ------------------------------------------------------------*/ if ( !iam ) { /* Read the matrix stored on disk in Harwell-Boeing format. */ zreadhb_dist(iam, fp, &m, &n, &nnz, &a, &asub, &xa); printf("\tDimension\t%dx%d\t # nonzeros %d\n", m, n, nnz); printf("\tProcess grid\t%d X %d\n", (int) grid2.nprow, (int) grid2.npcol); /* Broadcast matrix A to the other PEs. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid2.comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid2.comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid2.comm ); MPI_Bcast( a, nnz, SuperLU_MPI_DOUBLE_COMPLEX, 0, grid2.comm ); MPI_Bcast( asub, nnz, mpi_int_t, 0, grid2.comm ); MPI_Bcast( xa, n+1, mpi_int_t, 0, grid2.comm ); } else { /* Receive matrix A from PE 0. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid2.comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid2.comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid2.comm ); /* Allocate storage for compressed column representation. */ zallocateA_dist(n, nnz, &a, &asub, &xa); MPI_Bcast( a, nnz, SuperLU_MPI_DOUBLE_COMPLEX, 0, grid2.comm ); MPI_Bcast( asub, nnz, mpi_int_t, 0, grid2.comm ); MPI_Bcast( xa, n+1, mpi_int_t, 0, grid2.comm ); } /* Create compressed column matrix for A. */ zCreate_CompCol_Matrix_dist(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_Z, SLU_GE); /* Generate the exact solution and compute the right-hand side. */ if (!(b=doublecomplexMalloc_dist(m*nrhs))) ABORT("Malloc fails for b[]"); if (!(xtrue=doublecomplexMalloc_dist(n*nrhs))) ABORT("Malloc fails for xtrue[]"); *trans = 'N'; ldx = n; ldb = m; zGenXtrue_dist(n, nrhs, xtrue, ldx); zFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb); if ( !(berr = doubleMalloc_dist(nrhs)) ) ABORT("Malloc fails for berr[]."); /* ------------------------------------------------------------ NOW WE SOLVE THE LINEAR SYSTEM. ------------------------------------------------------------*/ /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = MMD_AT_PLUS_A; options.RowPerm = LargeDiag; options.ReplaceTinyPivot = YES; options.Trans = NOTRANS; options.IterRefine = DOUBLE; options.SolveInitialized = NO; options.RefineInitialized = NO; options.PrintStat = YES; */ set_default_options_dist(&options); /* Initialize ScalePermstruct and LUstruct. */ ScalePermstructInit(m, n, &ScalePermstruct); LUstructInit(n, &LUstruct); /* Initialize the statistics variables. */ PStatInit(&stat); /* Call the linear equation solver: factorize and solve. */ pzgssvx_ABglobal(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid2, &LUstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ if ( !iam ) { zinf_norm_error_dist(n, nrhs, b, ldb, xtrue, ldx, &grid2); } /* Print the statistics. */ PStatPrint(&options, &stat, &grid2); /* ------------------------------------------------------------ DEALLOCATE STORAGE. ------------------------------------------------------------*/ PStatFree(&stat); Destroy_CompCol_Matrix_dist(&A); Destroy_LU(n, &grid2, &LUstruct); ScalePermstructFree(&ScalePermstruct); LUstructFree(&LUstruct); SUPERLU_FREE(b); SUPERLU_FREE(xtrue); SUPERLU_FREE(berr); } /* ------------------------------------------------------------ RELEASE THE SUPERLU PROCESS GRIDS. ------------------------------------------------------------*/ superlu_gridexit(&grid1); superlu_gridexit(&grid2); out: /* ------------------------------------------------------------ TERMINATES THE MPI EXECUTION ENVIRONMENT. ------------------------------------------------------------*/ MPI_Finalize(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit main()"); #endif } SuperLU_DIST_5.3.0/EXAMPLE/pddrive_ABglobal.c0000644013363400111340000001742713233431301017266 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Driver program for pdgssvx_ABglobal example * *
 * -- Distributed SuperLU routine (version 1.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 * 
*/ #include #include "superlu_ddefs.h" /*! \brief * *
 * Purpose
 * =======
 *
 * The driver program pddrive_ABglobal.
 *
 * This example illustrates how to use pdgssvx_ABglobal with the full
 * (default) options to solve a linear system.
 * 
 * Five basic steps are required:
 *   1. Initialize the MPI environment and the SuperLU process grid
 *   2. Set up the input matrix and the right-hand side
 *   3. Set the options argument
 *   4. Call pdgssvx_ABglobal
 *   5. Release the process grid and terminate the MPI environment
 *
 * On an IBM SP, the program may be run by typing
 *    poe pddrive_ABglobal -r  -c   -procs 

*

*/ int main(int argc, char *argv[]) { superlu_dist_options_t options; SuperLUStat_t stat; SuperMatrix A; ScalePermstruct_t ScalePermstruct; LUstruct_t LUstruct; gridinfo_t grid; double *berr; double *a, *b, *xtrue; int_t *asub, *xa; int_t m, n, nnz; int_t nprow, npcol; int iam, info, ldb, ldx, nrhs; char trans[1]; char **cpp, c; FILE *fp, *fopen(); extern int cpp_defs(); /* prototypes */ extern void LUstructInit(const int_t, LUstruct_t *); extern void LUstructFree(LUstruct_t *); extern void Destroy_LU(int_t, gridinfo_t *, LUstruct_t *); nprow = 1; /* Default process rows. */ npcol = 1; /* Default process columns. */ nrhs = 1; /* Number of right-hand side. */ /* ------------------------------------------------------------ INITIALIZE MPI ENVIRONMENT. ------------------------------------------------------------*/ MPI_Init( &argc, &argv ); /* Parse command line argv[]. */ for (cpp = argv+1; *cpp; ++cpp) { if ( **cpp == '-' ) { c = *(*cpp+1); ++cpp; switch (c) { case 'h': printf("Options:\n"); printf("\t-r : process rows (default " IFMT ")\n", nprow); printf("\t-c : process columns (default " IFMT ")\n", npcol); exit(0); break; case 'r': nprow = atoi(*cpp); break; case 'c': npcol = atoi(*cpp); break; } } else { /* Last arg is considered a filename */ if ( !(fp = fopen(*cpp, "r")) ) { ABORT("File does not exist"); } break; } } /* ------------------------------------------------------------ INITIALIZE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ superlu_gridinit(MPI_COMM_WORLD, nprow, npcol, &grid); /* Bail out if I do not belong in the grid. */ iam = grid.iam; if ( iam >= nprow * npcol ) goto out; #if ( VAMPIR>=1 ) VT_traceoff(); #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter main()"); #endif /* ------------------------------------------------------------ PROCESS 0 READS THE MATRIX A, AND THEN BROADCASTS IT TO ALL THE OTHER PROCESSES. ------------------------------------------------------------*/ if ( !iam ) { /* Print the CPP definitions. */ cpp_defs(); /* Read the matrix stored on disk in Harwell-Boeing format. */ dreadhb_dist(iam, fp, &m, &n, &nnz, &a, &asub, &xa); printf("Input matrix file: %s\n", *cpp); printf("\tDimension\t" IFMT "x" IFMT "\t # nonzeros " IFMT "\n", m, n, nnz); printf("\tProcess grid\t%d X %d\n", (int) grid.nprow, (int) grid.npcol); /* Broadcast matrix A to the other PEs. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( a, nnz, MPI_DOUBLE, 0, grid.comm ); MPI_Bcast( asub, nnz, mpi_int_t, 0, grid.comm ); MPI_Bcast( xa, n+1, mpi_int_t, 0, grid.comm ); } else { /* Receive matrix A from PE 0. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid.comm ); /* Allocate storage for compressed column representation. */ dallocateA_dist(n, nnz, &a, &asub, &xa); MPI_Bcast( a, nnz, MPI_DOUBLE, 0, grid.comm ); MPI_Bcast( asub, nnz, mpi_int_t, 0, grid.comm ); MPI_Bcast( xa, n+1, mpi_int_t, 0, grid.comm ); } /* Create compressed column matrix for A. */ dCreate_CompCol_Matrix_dist(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE); /* Generate the exact solution and compute the right-hand side. */ if (!(b=doubleMalloc_dist(m*nrhs))) ABORT("Malloc fails for b[]"); if (!(xtrue=doubleMalloc_dist(n*nrhs))) ABORT("Malloc fails for xtrue[]"); *trans = 'N'; ldx = n; ldb = m; dGenXtrue_dist(n, nrhs, xtrue, ldx); dFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb); if ( !(berr = doubleMalloc_dist(nrhs)) ) ABORT("Malloc fails for berr[]."); /* ------------------------------------------------------------ NOW WE SOLVE THE LINEAR SYSTEM. ------------------------------------------------------------*/ /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = METIS_AT_PLUS_A; options.RowPerm = LargeDiag; options.ReplaceTinyPivot = YES; options.Trans = NOTRANS; options.IterRefine = DOUBLE; options.SolveInitialized = NO; options.RefineInitialized = NO; options.PrintStat = YES; */ set_default_options_dist(&options); if (!iam) { print_sp_ienv_dist(&options); print_options_dist(&options); } /* Initialize ScalePermstruct and LUstruct. */ ScalePermstructInit(m, n, &ScalePermstruct); LUstructInit(n, &LUstruct); /* Initialize the statistics variables. */ PStatInit(&stat); /* Call the linear equation solver. */ pdgssvx_ABglobal(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid, &LUstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ if ( !iam ) { dinf_norm_error_dist(n, nrhs, b, ldb, xtrue, ldx, &grid); } PStatPrint(&options, &stat, &grid); /* Print the statistics. */ /* ------------------------------------------------------------ DEALLOCATE STORAGE. ------------------------------------------------------------*/ PStatFree(&stat); Destroy_CompCol_Matrix_dist(&A); Destroy_LU(n, &grid, &LUstruct); ScalePermstructFree(&ScalePermstruct); LUstructFree(&LUstruct); SUPERLU_FREE(b); SUPERLU_FREE(xtrue); SUPERLU_FREE(berr); /* ------------------------------------------------------------ RELEASE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ out: superlu_gridexit(&grid); /* ------------------------------------------------------------ TERMINATES THE MPI EXECUTION ENVIRONMENT. ------------------------------------------------------------*/ MPI_Finalize(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit main()"); #endif } int cpp_defs() { printf(".. CPP definitions:\n"); #if ( PRNTlevel>=1 ) printf("\tPRNTlevel = %d\n", PRNTlevel); #endif #if ( DEBUGlevel>=1 ) printf("\tDEBUGlevel = %d\n", DEBUGlevel); #endif #if ( PROFlevel>=1 ) printf("\tPROFlevel = %d\n", PROFlevel); #endif #if ( StaticPivot>=1 ) printf("\tStaticPivot = %d\n", StaticPivot); #endif printf("....\n"); return 0; } SuperLU_DIST_5.3.0/EXAMPLE/g20.rua0000644013363400111340000012041713233431301015035 0ustar xiaoyessgg20, symm permuted by SYMMMD SYM 530 26 120 384 0 RUA 400 400 1920 0 (16I5) (16I5) (5E15.8) (5E15.8) 1 6 11 16 21 26 31 36 41 46 51 56 61 66 71 76 81 86 91 96 101 106 111 116 121 126 131 136 141 146 151 156 161 166 171 176 181 186 191 196 201 206 210 214 218 223 228 231 235 239 243 248 253 257 262 266 271 276 281 286 291 295 300 304 309 313 318 323 328 332 337 342 347 352 357 362 367 372 377 382 387 392 397 402 407 412 417 422 427 432 437 442 447 452 457 462 467 472 477 482 487 492 497 501 505 510 514 519 524 529 534 539 544 549 554 559 564 568 572 576 580 583 588 593 597 601 606 610 614 619 624 629 634 639 644 649 654 659 664 669 674 678 682 686 691 696 701 706 710 714 718 723 728 732 737 742 747 752 757 762 767 772 777 781 786 791 796 800 805 810 815 820 825 830 835 839 844 849 854 859 864 869 874 879 884 889 894 899 904 909 914 919 923 928 932 937 941 946 951 956 961 966 971 976 981 986 991 996 1001 1006 1011 1015 1020 1024 1029 1033 1038 1043 1048 1053 1058 1063 1068 1073 1078 1083 1088 1093 1098 1103 1108 1113 1117 1122 1126 1130 1133 1138 1142 1147 1152 1156 1161 1165 1170 1174 1179 1183 1188 1193 1198 1203 1208 1212 1217 1221 1226 1230 1235 1240 1245 1249 1254 1259 1264 1269 1274 1279 1284 1288 1293 1298 1303 1308 1313 1317 1322 1327 1332 1337 1342 1347 1352 1357 1362 1367 1372 1377 1382 1387 1392 1397 1402 1407 1412 1417 1422 1427 1432 1437 1442 1446 1451 1455 1459 1464 1468 1472 1477 1482 1487 1492 1497 1501 1506 1510 1515 1519 1522 1526 1530 1535 1539 1544 1549 1554 1559 1563 1568 1573 1578 1583 1587 1592 1596 1601 1605 1610 1615 1620 1625 1630 1635 1640 1645 1650 1655 1660 1665 1670 1674 1679 1684 1689 1694 1699 1704 1709 1714 1719 1724 1729 1734 1738 1743 1748 1753 1758 1763 1768 1773 1778 1783 1788 1792 1797 1802 1807 1811 1816 1821 1826 1831 1836 1841 1846 1851 1856 1861 1866 1871 1876 1881 1886 1891 1896 1901 1906 1911 1916 1921 1 9 32 391 395 2 9 392 395 400 3 8 389 393 394 4 8 9 392 394 5 7 8 381 389 6 7 8 9 32 5 6 7 33 382 3 4 5 6 8 1 2 4 6 9 10 31 396 398 399 11 18 29 30 31 12 18 31 397 399 13 17 386 387 388 14 17 18 30 387 15 17 383 388 390 16 17 18 390 397 13 14 15 16 17 11 12 14 16 18 19 28 33 380 382 20 22 28 29 33 21 22 29 31 398 20 21 22 32 391 23 27 379 384 385 24 27 28 379 380 25 27 30 385 387 26 27 28 29 30 23 24 25 26 27 19 20 24 26 28 11 20 21 26 29 11 14 25 26 30 10 11 12 21 31 1 6 22 32 33 7 19 20 32 33 34 74 92 370 372 35 40 70 174 176 36 40 91 174 177 37 39 40 70 71 38 39 40 90 91 37 38 39 74 92 35 36 37 38 40 41 56 70 71 73 42 44 55 56 43 44 175 176 42 43 44 45 44 45 56 70 176 46 54 55 56 73 47 49 53 48 49 54 55 47 48 49 52 50 51 53 69 50 51 52 68 72 49 51 52 53 54 47 50 52 53 46 48 52 54 72 42 46 48 55 41 42 45 46 56 57 59 67 68 72 58 59 67 74 372 57 58 59 71 73 60 66 67 372 373 61 62 65 69 61 62 66 67 68 63 64 65 374 63 64 66 371 373 61 63 65 66 60 62 64 65 66 57 58 60 62 67 51 57 62 68 69 50 61 68 69 35 37 41 45 70 37 41 59 71 74 51 54 57 72 73 41 46 59 72 73 34 39 58 71 74 75 89 369 381 389 76 78 92 370 375 77 78 89 369 375 76 77 78 88 90 79 87 91 177 178 80 87 88 90 91 81 86 87 173 178 82 86 173 379 380 83 85 86 87 88 84 85 86 380 382 83 84 85 89 381 81 82 83 84 86 79 80 81 83 87 78 80 83 88 89 75 77 85 88 89 38 78 80 90 92 36 38 79 80 91 34 39 76 90 92 93 172 385 386 387 94 98 168 169 170 95 98 169 172 386 96 98 109 170 171 97 98 109 386 388 94 95 96 97 98 99 108 377 383 390 100 108 376 377 378 101 107 109 167 171 102 107 109 383 388 103 106 107 167 104 106 108 378 105 106 107 108 383 103 104 105 106 101 102 103 105 107 99 100 104 105 108 96 97 101 102 109 110 166 173 379 384 111 132 138 162 164 112 132 138 168 169 113 131 132 163 164 114 130 131 132 168 115 129 130 168 170 116 123 128 129 130 117 121 123 128 118 120 131 163 119 120 121 123 118 119 120 122 117 119 121 120 122 123 130 131 116 117 119 122 123 124 127 128 129 125 127 167 171 126 127 129 170 171 124 125 126 127 116 117 124 128 115 116 124 126 129 114 115 116 122 130 113 114 118 122 131 111 112 113 114 132 133 137 138 162 165 134 137 165 166 384 135 137 138 169 172 136 137 172 384 385 133 134 135 136 137 111 112 133 135 138 139 145 174 175 176 140 145 161 174 177 141 143 145 175 142 143 153 160 141 142 143 144 143 144 145 160 161 139 140 141 144 145 146 152 153 159 160 147 152 159 162 165 148 150 152 153 149 150 163 164 148 149 150 151 150 151 152 162 164 146 147 148 151 152 142 146 148 153 154 158 159 160 161 155 158 161 177 178 156 158 159 165 166 157 158 166 173 178 154 155 156 157 158 146 147 154 156 159 142 144 146 154 160 140 144 154 155 161 111 133 147 151 162 113 118 149 163 111 113 149 151 164 133 134 147 156 165 110 134 156 157 166 101 103 125 167 94 112 114 115 168 94 95 112 135 169 94 96 115 126 170 96 101 125 126 171 93 95 135 136 172 81 82 110 157 173 35 36 139 140 174 43 139 141 175 35 43 45 139 176 36 79 140 155 177 79 81 155 157 178 179 183 201 369 375 180 183 200 201 274 181 183 369 389 393 182 183 272 274 393 179 180 181 182 183 184 188 370 372 373 185 188 199 371 373 186 188 201 370 375 187 188 199 200 201 184 185 186 187 188 189 198 200 274 277 190 198 273 276 277 191 197 198 199 200 192 193 196 374 192 193 197 199 371 194 195 196 275 194 195 197 198 276 192 194 196 197 191 193 195 196 197 189 190 191 195 198 185 187 191 193 199 180 187 189 191 200 179 180 186 187 201 202 271 272 393 394 203 207 272 274 277 204 207 218 273 277 205 207 267 271 272 206 207 218 266 267 203 204 205 206 207 208 217 218 266 268 209 217 265 268 270 210 216 217 218 273 211 212 215 275 211 212 216 273 276 213 214 215 269 213 214 216 217 270 211 213 215 216 210 212 214 215 216 208 209 210 214 217 204 206 208 210 218 219 223 271 392 394 220 223 229 267 271 221 223 365 392 400 222 223 229 365 366 219 220 221 222 223 224 228 229 266 267 225 228 263 266 268 226 228 229 264 366 227 228 262 263 264 224 225 226 227 228 220 222 224 226 229 230 249 263 265 268 231 239 248 249 265 232 233 238 269 232 233 239 265 270 234 236 237 238 235 236 237 247 234 235 236 234 235 237 239 248 232 234 238 239 231 233 237 238 239 240 246 249 262 263 241 242 245 247 241 242 246 248 249 243 244 245 261 243 244 246 260 262 241 243 245 246 240 242 244 245 246 235 241 247 248 231 237 242 247 248 230 231 240 242 249 250 259 264 364 366 251 259 260 262 264 252 258 259 364 367 253 254 257 261 253 254 258 259 260 255 256 257 363 255 256 258 367 368 253 255 257 258 252 254 256 257 258 250 251 252 254 259 244 251 254 260 261 243 253 260 261 227 240 244 251 262 225 227 230 240 263 226 227 250 251 264 209 230 231 233 265 206 208 224 225 266 205 206 220 224 267 208 209 225 230 268 213 232 269 270 209 214 233 269 270 202 205 219 220 271 182 202 203 205 272 190 204 210 212 273 180 182 189 203 274 194 211 275 276 190 195 212 275 276 189 190 203 204 277 278 283 289 395 400 279 283 289 361 362 280 282 283 391 395 281 282 283 359 361 280 281 282 396 398 278 279 280 281 283 284 288 289 365 400 285 288 364 365 366 286 288 289 360 362 287 288 360 364 367 284 285 286 287 288 278 279 284 286 289 290 292 298 396 399 291 292 298 355 357 290 291 292 358 359 293 297 377 390 397 294 297 298 397 399 295 297 356 376 377 296 297 298 355 356 293 294 295 296 297 290 291 294 296 298 299 328 351 355 357 300 308 328 355 356 301 307 326 327 302 307 308 326 328 303 306 376 378 304 306 307 308 305 306 308 356 376 303 304 305 306 301 302 304 307 300 302 304 305 308 309 311 326 328 351 310 311 324 326 327 309 310 311 325 353 312 323 325 352 353 313 314 322 350 313 314 323 352 354 315 316 320 322 315 316 321 323 325 317 319 324 327 318 319 320 317 318 319 321 315 318 320 321 316 319 320 321 324 313 315 322 323 312 314 316 322 323 310 317 321 324 325 311 312 316 324 325 301 302 309 310 326 301 310 317 327 299 300 302 309 328 329 338 349 360 362 330 338 360 367 368 331 337 338 348 349 332 333 336 363 332 333 337 338 368 334 335 336 350 334 335 337 348 354 332 334 336 337 331 333 335 336 337 329 330 331 333 338 339 347 358 359 361 340 347 349 361 362 341 346 347 348 349 342 346 348 352 354 343 345 346 347 358 344 345 346 352 353 343 344 345 351 357 341 342 343 344 346 339 340 341 343 347 331 335 341 342 348 329 331 340 341 349 313 334 350 354 299 309 345 351 353 312 314 342 344 352 311 312 344 351 353 314 335 342 350 354 291 296 299 300 355 295 296 300 305 356 291 299 345 357 358 292 339 343 357 358 281 292 339 359 396 286 287 329 330 360 279 281 339 340 361 279 286 329 340 362 255 332 363 368 250 252 285 287 364 221 222 284 285 365 222 226 250 285 366 252 256 287 330 367 256 330 333 363 368 75 77 179 181 369 34 76 184 186 370 64 185 193 371 374 34 58 60 184 372 60 64 184 185 373 63 192 371 374 76 77 179 186 375 100 295 303 305 376 99 100 293 295 377 100 104 303 378 23 24 82 110 379 19 24 82 84 380 5 75 85 381 382 7 19 84 381 382 15 99 102 105 383 23 110 134 136 384 23 25 93 136 385 13 93 95 97 386 13 14 25 93 387 13 15 97 102 388 3 5 75 181 389 15 16 99 293 390 1 22 280 391 398 2 4 219 221 392 3 181 182 202 393 3 4 202 219 394 1 2 278 280 395 10 282 290 359 396 12 16 293 294 397 10 21 282 391 398 10 12 290 294 399 2 221 278 284 400 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 -1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00-1.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 -1.00000000E+00-1.00000000E+00-1.00000000E+00-1.00000000E+00 4.00000000E+00 SuperLU_DIST_5.3.0/EXAMPLE/big.rua0000644013363400111340000336316013233431301015215 0ustar xiaoyessg32-bit adder, from Steve Hamm (Motorola) hamm@austoto.sps.mot.com add32 11491 382 1493 7962 1654 RUA 4960 4960 23884 0 (13i6) (16i5) (3e26.18) (3e26.18) F 1 0 1 30 42 45 57 65 73 76 87 111 114 117 127 130 144 157 165 178 181 191 203 206 214 226 236 239 251 254 263 274 278 288 310 321 324 340 350 359 362 373 397 400 404 417 420 430 440 450 468 471 484 496 499 509 520 530 534 546 549 558 570 573 581 609 622 625 635 644 653 656 671 695 698 702 714 717 728 739 752 765 768 778 793 796 804 816 826 829 841 844 853 865 868 876 899 912 915 925 934 943 946 961 988 991 995 1009 1012 1022 1031 1043 1058 1061 1071 1087 1090 1098 1110 1121 1124 1136 1140 1149 1160 1164 1174 1202 1214 1218 1230 1239 1248 1251 1263 1288 1291 1294 1304 1307 1319 1330 1343 1356 1359 1371 1385 1388 1398 1409 1418 1422 1436 1440 1449 1461 1464 1472 1501 1512 1516 1532 1542 1551 1554 1565 1590 1593 1596 1607 1611 1623 1634 1646 1660 1663 1675 1689 1692 1700 1712 1722 1725 1737 1740 1749 1761 1764 1772 1801 1812 1816 1832 1842 1851 1854 1865 1891 1894 1897 1910 1913 1923 1932 1945 1959 1962 1974 1990 1993 2003 2014 2024 2028 2039 2043 2052 2064 2067 2075 2101 2113 2117 2131 2140 2149 2152 2163 2190 2193 2197 2209 2212 2224 2234 2244 2258 2261 2271 2286 2289 2297 2309 2319 2322 2334 2337 2346 2358 2361 2369 2399 2411 2414 2425 2434 2443 2446 2459 2483 2486 2490 2504 2507 2517 2526 2536 2552 2555 2565 2581 2584 2592 2604 2614 2617 2629 2632 2641 2653 2656 2664 2696 2708 2711 2722 2731 2740 2743 2755 2780 2783 2787 2799 2802 2813 2827 2839 2852 2855 2868 2879 2882 2890 2902 2912 2915 2927 2930 2939 2951 2954 2962 2992 3004 3007 3018 3027 3036 3039 3051 3078 3081 3085 3096 3099 3114 3126 3139 3150 3153 3165 3177 3180 3188 3200 3210 3213 3225 3228 3237 3248 3252 3262 3292 3304 3307 3318 3327 3336 3339 3351 3377 3380 3384 3395 3398 3412 3425 3437 3449 3452 3465 3476 3479 3487 3499 3509 3512 3524 3527 3536 3548 3551 3559 3586 3598 3601 3614 3623 3632 3635 3647 3673 3676 3680 3691 3694 3709 3722 3735 3746 3749 3761 3773 3776 3784 3796 3806 3809 3821 3824 3833 3845 3848 3856 3886 3898 3901 3914 3923 3932 3935 3946 3973 3976 3980 3991 3994 4006 4019 4031 4044 4047 4057 4070 4073 4081 4093 4103 4106 4118 4121 4130 4142 4145 4153 4180 4193 4196 4206 4215 4224 4227 4241 4267 4270 4274 4287 4290 4300 4312 4322 4338 4341 4353 4365 4368 4376 4388 4398 4401 4413 4416 4425 4436 4440 4450 4476 4489 4492 4502 4511 4520 4523 4537 4566 4569 4573 4584 4587 4602 4614 4626 4638 4641 4651 4664 4667 4677 4688 4697 4701 4712 4715 4724 4736 4739 4747 4779 4792 4795 4805 4814 4823 4826 4841 4864 4867 4871 4882 4885 4897 4908 4920 4934 4937 4948 4962 4965 4973 4985 4995 4998 5010 5013 5022 5034 5037 5045 5070 5082 5085 5096 5105 5114 5117 5137 5164 5167 5171 5183 5186 5200 5213 5225 5237 5240 5252 5264 5267 5275 5287 5297 5300 5312 5315 5324 5336 5339 5347 5376 5389 5392 5402 5411 5420 5423 5437 5463 5466 5470 5481 5484 5498 5512 5526 5537 5540 5553 5564 5567 5575 5587 5597 5600 5612 5615 5624 5636 5639 5647 5674 5687 5690 5700 5709 5718 5721 5735 5759 5762 5766 5777 5780 5792 5805 5815 5829 5832 5843 5856 5859 5867 5879 5889 5894 5906 5909 5918 5930 5935 5943 5970 5982 5985 5997 6007 6016 6019 6033 6056 6059 6062 6072 6075 6088 6098 6111 6125 6129 6142 6156 6160 6169 6179 6189 6193 6205 6208 6217 6228 6233 6240 6264 6275 6278 6294 6304 6313 6316 6327 6351 6354 6357 6367 6370 6383 6393 6406 6420 6424 6437 6451 6455 6463 6475 6485 6490 6502 6505 6514 6526 6531 6539 6563 6574 6577 6596 6605 6613 6616 6627 6652 6655 6658 6671 6674 6684 6693 6706 6721 6725 6737 6754 6758 6765 6776 6786 6791 6803 6806 6815 6825 6829 6838 6867 6879 6882 6897 6907 6916 6919 6931 6955 6958 6961 6972 6975 6987 6997 7010 7023 7027 7039 7054 7058 7066 7078 7088 7093 7105 7108 7117 7129 7134 7142 7168 7179 7182 7197 7207 7216 7219 7230 7255 7258 7261 7271 7274 7287 7297 7309 7324 7328 7341 7354 7357 7364 7374 7383 7388 7400 7403 7412 7423 7428 7435 7465 7478 7481 7491 7500 7509 7512 7527 7551 7554 7557 7567 7570 7583 7593 7604 7618 7622 7633 7648 7652 7663 7673 7683 7687 7699 7702 7711 7722 7728 7737 7765 7778 7781 7791 7800 7809 7812 7827 7851 7854 7857 7867 7870 7883 7893 7903 7917 7920 7930 7945 7948 7957 7968 7978 7984 7996 7999 8008 8019 8025 8034 8061 8074 8077 8087 8096 8105 8108 8123 8150 8154 8158 8171 8174 8184 8196 8206 8222 8225 8236 8249 8253 8262 8273 8283 8289 8301 8304 8313 8323 8327 8338 8369 8381 8384 8395 8404 8413 8416 8429 8451 8454 8458 8469 8472 8484 8495 8505 8520 8523 8534 8548 8551 8560 8571 8581 8587 8599 8602 8611 8622 8628 8637 8663 8676 8679 8689 8698 8707 8710 8724 8750 8754 8758 8769 8772 8784 8795 8805 8820 8823 8834 8848 8852 8861 8872 8882 8888 8900 8903 8912 8922 8926 8937 8959 8971 8974 8986 8995 9004 9007 9018 9044 9048 9052 9063 9066 9079 9089 9099 9113 9116 9127 9142 9146 9155 9166 9176 9182 9194 9197 9206 9216 9220 9231 9234 9244 9247 9259 9269 9278 9281 9292 9317 9320 9323 9333 9336 9352 9362 9376 9387 9391 9404 9417 9421 9432 9442 9452 9456 9468 9471 9480 9491 9497 9506 9510 9513 9518 9521 9525 9532 9537 9540 9542 9545 9548 9550 9554 9557 9559 9562 9566 9569 9574 9577 9582 9585 9590 9596 9599 9601 9605 9608 9611 9615 9619 9622 9626 9629 9633 9636 9638 9641 9645 9648 9651 9653 9655 9658 9662 9665 9669 9672 9677 9680 9684 9691 9695 9698 9700 9703 9707 9710 9713 9715 9719 9722 9726 9729 9734 9737 9742 9748 9752 9755 9757 9760 9763 9765 9769 9772 9775 9779 9783 9786 9790 9793 9798 9801 9805 9813 9817 9820 9824 9827 9830 9834 9838 9841 9845 9848 9852 9855 9858 9862 9866 9869 9872 9874 9876 9879 9883 9886 9890 9893 9898 9901 9905 9912 9916 9919 9921 9924 9928 9931 9934 9936 9940 9943 9947 9950 9955 9958 9962 9970 9974 9977 9979 9982 9985 9987 9991 9994 9997 10001 10005 10008 10012 10015 10020 10023 10027 10034 10038 10041 10045 10048 10051 10055 10059 10062 10066 10069 10073 10076 10079 10083 10087 10090 10093 10095 10097 10100 10104 10107 10111 10114 10119 10122 10126 10133 10137 10140 10142 10145 10149 10152 10155 10157 10161 10164 10168 10171 10176 10179 10183 10190 10194 10197 10199 10202 10205 10207 10211 10214 10217 10221 10225 10228 10232 10235 10240 10243 10247 10256 10260 10263 10267 10270 10273 10277 10281 10284 10288 10291 10295 10298 10301 10305 10309 10312 10315 10317 10319 10322 10326 10329 10333 10336 10340 10343 10347 10353 10357 10360 10362 10365 10369 10372 10375 10377 10381 10384 10388 10391 10395 10398 10402 10408 10412 10415 10417 10420 10423 10425 10429 10432 10435 10439 10443 10446 10451 10454 10459 10462 10467 10473 10477 10480 10484 10487 10490 10494 10498 10501 10505 10508 10512 10515 10518 10522 10526 10529 10532 10534 10536 10539 10543 10546 10550 10553 10558 10561 10565 10570 10574 10577 10579 10582 10586 10589 10592 10594 10598 10601 10605 10608 10612 10615 10620 10625 10629 10632 10634 10637 10640 10642 10646 10649 10652 10656 10660 10663 10668 10671 10675 10678 10682 10688 10692 10695 10699 10702 10705 10709 10713 10716 10720 10723 10727 10730 10733 10737 10741 10744 10747 10749 10751 10754 10758 10761 10765 10768 10773 10776 10780 10787 10791 10794 10796 10799 10803 10806 10809 10811 10815 10818 10822 10825 10829 10832 10837 10842 10846 10849 10851 10854 10857 10859 10863 10866 10869 10873 10877 10880 10885 10888 10893 10896 10900 10909 10913 10916 10920 10923 10926 10930 10934 10937 10941 10944 10948 10951 10954 10958 10962 10965 10968 10970 10972 10975 10979 10982 10986 10989 10993 10996 11000 11007 11011 11014 11016 11019 11023 11026 11029 11031 11035 11038 11042 11045 11049 11052 11056 11062 11066 11069 11071 11074 11077 11079 11083 11086 11089 11093 11097 11100 11104 11107 11112 11115 11119 11126 11130 11133 11137 11140 11143 11147 11151 11154 11158 11161 11165 11168 11171 11175 11179 11182 11185 11187 11189 11192 11196 11199 11203 11206 11211 11214 11218 11225 11229 11232 11234 11237 11241 11244 11247 11249 11253 11256 11260 11263 11268 11271 11275 11282 11286 11289 11291 11294 11297 11299 11303 11306 11309 11313 11317 11320 11324 11327 11332 11335 11339 11348 11352 11355 11359 11362 11365 11369 11373 11376 11380 11383 11387 11390 11393 11397 11401 11404 11407 11409 11411 11414 11418 11421 11425 11428 11433 11436 11440 11447 11451 11454 11456 11459 11463 11466 11469 11471 11475 11478 11482 11485 11490 11493 11497 11504 11508 11511 11513 11516 11519 11521 11525 11528 11531 11535 11539 11542 11546 11549 11554 11557 11561 11568 11572 11575 11579 11582 11585 11589 11593 11596 11600 11603 11607 11610 11613 11617 11621 11624 11627 11629 11631 11634 11638 11641 11645 11648 11653 11656 11660 11667 11671 11674 11676 11679 11683 11686 11689 11691 11695 11698 11702 11705 11710 11713 11717 11724 11728 11731 11733 11736 11739 11741 11745 11748 11751 11755 11759 11762 11766 11769 11774 11777 11782 11788 11792 11795 11799 11802 11805 11809 11813 11816 11820 11823 11827 11830 11833 11837 11841 11844 11847 11849 11851 11854 11858 11861 11865 11868 11873 11876 11880 11887 11891 11894 11896 11899 11903 11906 11909 11911 11915 11918 11922 11925 11930 11933 11937 11944 11948 11951 11953 11956 11959 11961 11965 11968 11971 11975 11979 11982 11986 11989 11994 11997 12002 12008 12012 12015 12019 12022 12025 12029 12033 12036 12040 12043 12047 12050 12053 12057 12061 12064 12067 12069 12071 12074 12078 12081 12085 12088 12093 12096 12100 12107 12111 12114 12116 12119 12123 12126 12129 12131 12135 12138 12142 12145 12150 12153 12157 12164 12168 12171 12173 12176 12179 12181 12185 12188 12191 12195 12199 12202 12206 12209 12214 12217 12222 12228 12232 12235 12239 12242 12245 12249 12253 12256 12260 12263 12267 12270 12273 12277 12281 12284 12287 12289 12291 12294 12298 12301 12305 12308 12313 12316 12320 12327 12331 12334 12336 12339 12343 12346 12349 12351 12355 12358 12362 12365 12370 12373 12377 12384 12388 12391 12393 12396 12399 12401 12405 12408 12411 12415 12419 12422 12426 12429 12434 12437 12442 12448 12452 12455 12459 12462 12465 12469 12473 12476 12480 12483 12487 12490 12493 12497 12501 12504 12507 12509 12511 12514 12518 12521 12525 12528 12533 12536 12540 12547 12551 12554 12556 12559 12563 12566 12569 12571 12575 12578 12582 12585 12590 12593 12597 12605 12609 12612 12614 12617 12620 12622 12626 12629 12632 12636 12640 12643 12647 12650 12655 12658 12662 12670 12674 12677 12681 12684 12687 12691 12695 12698 12702 12705 12709 12712 12715 12719 12723 12726 12729 12731 12733 12736 12740 12743 12747 12750 12755 12758 12762 12769 12773 12776 12778 12781 12785 12788 12791 12793 12797 12800 12804 12807 12812 12815 12819 12827 12831 12834 12836 12839 12842 12844 12848 12851 12854 12858 12862 12865 12869 12872 12877 12880 12885 12891 12895 12898 12902 12905 12908 12912 12916 12919 12923 12926 12930 12933 12936 12940 12944 12947 12950 12952 12954 12957 12961 12964 12968 12971 12976 12979 12983 12991 12995 12998 13000 13003 13007 13010 13013 13015 13019 13022 13026 13029 13034 13037 13041 13049 13053 13056 13058 13061 13064 13066 13070 13073 13076 13080 13084 13087 13091 13094 13099 13102 13107 13113 13117 13120 13124 13127 13130 13134 13138 13141 13145 13148 13152 13155 13158 13162 13166 13169 13172 13174 13176 13179 13183 13186 13190 13193 13198 13201 13205 13212 13216 13219 13221 13224 13228 13231 13234 13236 13240 13243 13247 13250 13255 13258 13262 13269 13273 13276 13278 13281 13284 13286 13290 13293 13296 13300 13304 13307 13311 13314 13319 13322 13326 13333 13337 13340 13344 13347 13350 13354 13358 13361 13365 13368 13372 13375 13378 13382 13386 13389 13392 13394 13396 13399 13403 13406 13410 13413 13418 13421 13425 13432 13436 13439 13441 13444 13448 13451 13454 13456 13460 13463 13467 13470 13475 13478 13482 13490 13494 13497 13499 13502 13505 13507 13511 13514 13517 13521 13525 13528 13532 13535 13540 13543 13548 13554 13558 13561 13565 13568 13571 13575 13579 13582 13586 13589 13593 13596 13599 13603 13607 13610 13613 13615 13617 13620 13624 13627 13631 13634 13639 13642 13646 13653 13657 13660 13662 13665 13669 13672 13675 13677 13681 13684 13688 13691 13696 13699 13703 13710 13714 13717 13719 13722 13725 13727 13731 13734 13737 13741 13745 13748 13752 13755 13760 13763 13768 13774 13778 13781 13785 13788 13791 13795 13799 13802 13806 13809 13813 13816 13819 13823 13827 13830 13833 13835 13837 13840 13844 13847 13851 13854 13859 13862 13866 13873 13877 13880 13882 13885 13889 13892 13895 13897 13901 13904 13908 13911 13916 13919 13924 13930 13934 13937 13939 13942 13945 13947 13951 13954 13957 13961 13965 13968 13973 13976 13981 13984 13989 13995 13999 14002 14006 14009 14012 14016 14020 14023 14027 14030 14034 14037 14040 14044 14048 14051 14054 14056 14058 14061 14066 14069 14073 14076 14081 14084 14088 14095 14099 14102 14104 14107 14112 14115 14118 14120 14124 14127 14131 14134 14139 14142 14147 14153 14157 14160 14162 14165 14168 14170 14174 14177 14180 14184 14188 14191 14196 14199 14204 14207 14212 14218 14222 14225 14229 14232 14235 14239 14243 14246 14250 14253 14257 14260 14263 14267 14271 14274 14277 14279 14281 14284 14288 14291 14295 14298 14303 14306 14310 14317 14321 14324 14326 14329 14333 14336 14339 14341 14345 14348 14352 14355 14360 14363 14368 14374 14379 14382 14384 14387 14390 14392 14396 14399 14402 14406 14410 14413 14418 14421 14426 14429 14433 14442 14446 14449 14453 14456 14459 14463 14467 14470 14474 14477 14481 14484 14487 14491 14495 14498 14501 14503 14505 14508 14513 14516 14520 14523 14528 14531 14535 14542 14546 14549 14551 14554 14559 14562 14565 14567 14571 14574 14578 14581 14586 14589 14594 14600 14604 14607 14609 14612 14615 14617 14621 14624 14627 14631 14635 14638 14643 14646 14651 14654 14658 14665 14669 14672 14676 14679 14682 14686 14690 14693 14697 14700 14704 14707 14710 14714 14718 14721 14724 14726 14728 14731 14735 14738 14742 14745 14750 14753 14757 14764 14768 14771 14773 14776 14780 14783 14786 14788 14792 14795 14799 14802 14807 14810 14815 14821 14825 14828 14830 14833 14836 14838 14842 14845 14848 14852 14856 14859 14864 14867 14872 14875 14880 14886 14890 14893 14897 14900 14903 14907 14911 14914 14918 14921 14925 14928 14931 14935 14939 14942 14945 14947 14949 14952 14957 14960 14964 14967 14972 14975 14979 14986 14990 14993 14995 14998 15003 15006 15009 15011 15016 15019 15023 15026 15031 15034 15038 15046 15050 15053 15055 15058 15061 15063 15067 15070 15073 15077 15081 15084 15089 15092 15097 15100 15105 15111 15115 15118 15122 15125 15128 15132 15136 15139 15143 15146 15150 15153 15156 15160 15164 15167 15170 15172 15174 15177 15181 15184 15188 15191 15196 15199 15203 15210 15214 15217 15219 15222 15226 15229 15232 15234 15238 15241 15245 15248 15253 15256 15260 15268 15272 15275 15277 15280 15283 15285 15289 15292 15295 15299 15303 15306 15311 15314 15319 15322 15327 15333 15337 15340 15344 15347 15350 15354 15358 15361 15365 15368 15372 15375 15378 15382 15386 15389 15392 15394 15396 15399 15403 15406 15410 15413 15418 15421 15425 15432 15436 15439 15441 15444 15448 15451 15454 15456 15460 15463 15467 15470 15475 15478 15482 15490 15494 15497 15499 15502 15505 15507 15511 15514 15517 15521 15525 15528 15532 15535 15540 15543 15547 15555 15559 15562 15566 15569 15572 15576 15580 15583 15587 15590 15594 15597 15600 15604 15608 15611 15614 15616 15618 15621 15625 15628 15632 15635 15640 15643 15647 15654 15658 15661 15663 15666 15670 15673 15676 15678 15682 15685 15689 15692 15697 15700 15704 15711 15715 15718 15720 15723 15726 15728 15732 15735 15738 15742 15746 15749 15753 15756 15761 15764 15769 15775 15779 15782 15786 15789 15792 15796 15800 15803 15807 15810 15814 15817 15820 15824 15828 15831 15834 15836 15838 15841 15845 15848 15852 15855 15860 15863 15867 15874 15878 15881 15883 15886 15890 15893 15896 15898 15902 15905 15909 15912 15917 15920 15924 15932 15936 15939 15941 15944 15947 15949 15953 15956 15959 15963 15967 15970 15974 15977 15982 15985 15990 15996 16000 16003 16007 16010 16013 16017 16021 16024 16028 16031 16035 16038 16041 16045 16049 16052 16055 16057 16059 16062 16066 16069 16073 16076 16081 16084 16088 16095 16099 16102 16104 16107 16111 16114 16117 16119 16123 16126 16130 16133 16138 16141 16145 16152 16156 16159 16161 16164 16167 16169 16173 16176 16179 16183 16187 16190 16194 16197 16202 16205 16210 16216 16220 16223 16227 16230 16233 16237 16241 16244 16248 16251 16255 16258 16261 16265 16269 16272 16275 16277 16279 16282 16286 16289 16293 16296 16301 16304 16308 16315 16319 16322 16324 16327 16331 16334 16337 16339 16343 16346 16351 16354 16359 16362 16367 16373 16377 16380 16382 16385 16388 16390 16394 16397 16400 16404 16408 16411 16416 16419 16424 16427 16432 16438 16442 16445 16449 16452 16455 16459 16463 16466 16470 16473 16477 16480 16483 16487 16491 16494 16497 16499 16501 16504 16508 16511 16515 16518 16523 16526 16530 16537 16541 16544 16546 16549 16553 16556 16559 16561 16565 16568 16572 16575 16579 16582 16585 16589 16594 16597 16600 16603 16606 16608 16612 16615 16620 16623 16628 16631 16635 16638 16641 16645 16649 16652 16656 16662 16668 16671 16674 16676 16680 16683 16686 16691 16697 16700 16704 16707 16711 16714 16717 16722 16725 16727 16731 16734 16738 16741 16744 16748 16752 16755 16758 16763 16767 16770 16773 16775 16781 16784 16787 16793 16797 16800 16804 16807 16810 16814 16818 16821 16824 16827 16830 16832 16836 16839 16844 16847 16851 16854 16858 16861 16864 16868 16872 16875 16879 16887 16892 16895 16899 16902 16906 16909 16914 16920 16925 16928 16932 16935 16939 16942 16945 16950 16953 16955 16959 16962 16966 16969 16972 16976 16980 16983 16986 16991 16997 17000 17003 17005 17009 17012 17017 17023 17027 17030 17034 17037 17040 17044 17048 17051 17054 17057 17060 17062 17066 17069 17074 17077 17081 17084 17088 17091 17094 17098 17102 17105 17110 17116 17121 17124 17128 17131 17135 17138 17142 17150 17155 17158 17162 17165 17169 17172 17175 17180 17183 17185 17189 17192 17196 17199 17202 17206 17210 17213 17216 17221 17227 17230 17233 17235 17241 17244 17248 17255 17259 17262 17266 17269 17272 17276 17280 17283 17286 17289 17292 17294 17298 17301 17306 17309 17313 17316 17320 17323 17326 17330 17334 17337 17342 17348 17353 17356 17360 17363 17367 17370 17374 17382 17387 17390 17394 17397 17401 17404 17407 17412 17415 17417 17421 17424 17428 17431 17434 17438 17442 17445 17448 17453 17457 17460 17463 17465 17471 17474 17479 17485 17489 17492 17496 17499 17502 17506 17510 17513 17516 17519 17522 17524 17528 17531 17536 17539 17544 17547 17551 17554 17557 17561 17565 17568 17573 17579 17584 17587 17591 17594 17598 17601 17606 17612 17617 17620 17624 17627 17631 17634 17637 17642 17645 17647 17651 17654 17658 17661 17664 17668 17672 17675 17678 17683 17689 17692 17695 17697 17701 17704 17709 17715 17719 17722 17726 17729 17732 17736 17740 17743 17746 17749 17752 17754 17758 17761 17766 17769 17774 17777 17781 17784 17787 17791 17795 17798 17802 17809 17814 17817 17821 17824 17828 17831 17836 17842 17847 17850 17854 17857 17861 17864 17867 17872 17875 17877 17881 17884 17888 17891 17894 17898 17902 17905 17908 17913 17919 17922 17925 17927 17933 17936 17941 17947 17951 17954 17958 17961 17964 17968 17972 17975 17978 17981 17984 17986 17990 17993 17998 18001 18006 18009 18013 18016 18019 18023 18027 18030 18035 18041 18046 18049 18053 18056 18060 18063 18068 18074 18079 18082 18086 18089 18093 18096 18099 18104 18107 18109 18113 18116 18121 18124 18127 18131 18135 18138 18141 18146 18152 18155 18158 18160 18164 18167 18172 18178 18182 18185 18189 18192 18195 18199 18203 18206 18209 18212 18215 18217 18221 18224 18229 18232 18236 18239 18243 18246 18249 18253 18257 18260 18264 18272 18277 18280 18284 18287 18291 18294 18298 18306 18311 18314 18318 18321 18325 18328 18331 18336 18339 18341 18345 18348 18352 18355 18358 18362 18366 18369 18372 18377 18383 18386 18389 18391 18397 18400 18405 18411 18415 18418 18422 18425 18428 18432 18436 18439 18442 18445 18448 18450 18454 18457 18462 18465 18469 18472 18476 18479 18482 18486 18490 18493 18497 18505 18510 18513 18517 18520 18524 18527 18531 18539 18544 18547 18551 18554 18558 18561 18564 18569 18572 18574 18578 18581 18585 18588 18591 18595 18599 18602 18605 18610 18616 18619 18622 18624 18630 18633 18637 18644 18648 18651 18655 18658 18661 18665 18669 18672 18675 18678 18681 18683 18687 18690 18695 18698 18702 18705 18709 18712 18715 18719 18723 18726 18731 18737 18742 18745 18749 18752 18756 18759 18764 18770 18775 18778 18782 18785 18789 18792 18795 18800 18803 18805 18809 18812 18816 18819 18822 18826 18830 18833 18836 18841 18847 18850 18853 18855 18861 18864 18869 18875 18879 18882 18886 18889 18892 18896 18900 18903 18906 18909 18912 18914 18918 18921 18926 18929 18933 18936 18940 18943 18946 18950 18954 18957 18962 18968 18973 18976 18980 18983 18987 18990 18995 19001 19006 19009 19013 19016 19020 19023 19026 19031 19034 19036 19040 19043 19047 19050 19053 19057 19061 19064 19067 19072 19076 19079 19082 19084 19090 19093 19098 19104 19108 19111 19115 19118 19121 19125 19129 19132 19135 19138 19141 19143 19147 19150 19155 19158 19162 19165 19169 19172 19175 19179 19183 19186 19191 19197 19202 19205 19209 19212 19216 19219 19224 19230 19235 19238 19242 19245 19249 19252 19255 19260 19263 19265 19269 19272 19276 19279 19282 19286 19290 19293 19296 19301 19307 19310 19313 19315 19321 19324 19329 19335 19339 19342 19346 19349 19352 19356 19360 19363 19366 19369 19372 19374 19378 19381 19386 19389 19393 19396 19400 19403 19406 19410 19414 19417 19422 19428 19433 19436 19440 19443 19447 19450 19454 19461 19466 19469 19473 19476 19480 19483 19486 19491 19494 19496 19500 19503 19507 19510 19513 19517 19521 19524 19527 19532 19538 19541 19544 19546 19552 19555 19559 19566 19570 19573 19577 19580 19583 19587 19591 19594 19597 19600 19603 19605 19609 19612 19617 19620 19624 19627 19631 19634 19637 19641 19645 19648 19653 19659 19664 19667 19671 19674 19678 19681 19685 19693 19698 19701 19705 19708 19712 19715 19718 19723 19726 19728 19732 19735 19739 19742 19745 19749 19753 19756 19759 19764 19770 19773 19776 19778 19784 19787 19792 19798 19802 19805 19809 19812 19815 19819 19823 19826 19829 19832 19835 19837 19841 19844 19849 19852 19856 19859 19863 19866 19869 19873 19877 19880 19884 19892 19897 19900 19904 19907 19911 19914 19919 19925 19930 19933 19937 19940 19944 19947 19950 19955 19958 19960 19964 19967 19971 19974 19977 19981 19985 19988 19991 19996 20000 20003 20006 20008 20014 20017 20022 20028 20032 20035 20039 20042 20045 20049 20053 20056 20059 20062 20065 20067 20071 20074 20079 20082 20086 20089 20093 20096 20099 20103 20107 20110 20114 20121 20126 20129 20133 20136 20140 20143 20147 20155 20160 20163 20167 20170 20174 20177 20180 20185 20188 20190 20194 20197 20202 20205 20208 20212 20216 20219 20222 20227 20233 20236 20239 20241 20245 20248 20253 20259 20263 20266 20270 20273 20276 20280 20284 20287 20290 20293 20296 20298 20302 20305 20310 20313 20317 20320 20324 20327 20330 20334 20338 20341 20346 20352 20357 20360 20364 20367 20371 20374 20378 20385 20390 20393 20397 20400 20404 20407 20410 20415 20418 20420 20424 20427 20431 20434 20437 20441 20445 20448 20451 20456 20462 20465 20468 20470 20476 20479 20483 20490 20494 20497 20501 20504 20507 20511 20515 20518 20521 20524 20527 20529 20533 20536 20541 20544 20548 20551 20555 20558 20561 20565 20569 20572 20576 20583 20588 20591 20595 20598 20602 20605 20609 20616 20621 20624 20628 20631 20635 20638 20641 20646 20649 20651 20655 20658 20662 20665 20668 20672 20676 20679 20682 20687 20693 20696 20699 20701 20707 20710 20714 20721 20725 20728 20732 20735 20738 20742 20746 20749 20752 20755 20758 20760 20764 20767 20772 20775 20779 20782 20786 20789 20792 20796 20800 20803 20808 20814 20819 20822 20826 20829 20833 20836 20841 20847 20852 20855 20859 20862 20866 20869 20872 20877 20880 20882 20886 20889 20893 20896 20899 20903 20907 20910 20913 20918 20924 20927 20930 20932 20938 20941 20946 20952 20956 20959 20963 20966 20969 20973 20977 20980 20983 20986 20989 20991 20995 20998 21003 21006 21010 21013 21017 21020 21023 21027 21031 21034 21038 21046 21051 21054 21058 21061 21065 21068 21072 21079 21084 21087 21091 21094 21098 21101 21105 21108 21111 21113 21117 21120 21124 21127 21130 21134 21138 21141 21145 21148 21153 21156 21159 21161 21166 21169 21173 21180 21184 21187 21191 21194 21197 21201 21205 21208 21211 21214 21217 21219 21223 21226 21231 21234 21239 21242 21246 21249 21252 21256 21260 21263 21268 21273 21277 21280 21284 21287 21291 21294 21298 21304 21308 21311 21315 21318 21322 21325 21328 21334 21337 21339 21343 21346 21350 21353 21356 21360 21364 21367 21371 21374 21380 21383 21386 21388 21392 21395 21399 21406 21410 21413 21417 21420 21423 21427 21431 21434 21437 21440 21443 21445 21449 21452 21457 21460 21465 21468 21472 21475 21478 21482 21486 21489 21494 21499 21503 21506 21510 21513 21517 21520 21524 21530 21534 21537 21541 21544 21548 21551 21555 21558 21561 21563 21567 21570 21574 21577 21580 21584 21588 21591 21595 21598 21603 21606 21609 21611 21616 21619 21624 21630 21634 21637 21641 21644 21647 21651 21656 21659 21662 21665 21668 21670 21674 21677 21682 21685 21690 21693 21697 21700 21703 21707 21711 21714 21718 21724 21728 21731 21735 21738 21742 21745 21749 21755 21759 21762 21766 21769 21773 21776 21780 21783 21786 21788 21792 21795 21799 21802 21805 21809 21813 21816 21819 21825 21829 21832 21835 21837 21843 21846 21851 21857 21861 21864 21868 21871 21874 21878 21882 21885 21888 21891 21894 21896 21900 21903 21908 21911 21916 21919 21923 21926 21929 21933 21937 21940 21945 21950 21954 21957 21961 21964 21968 21971 21975 21981 21985 21988 21992 21995 21999 22002 22006 22009 22012 22014 22018 22021 22025 22028 22031 22035 22039 22042 22046 22049 22054 22057 22060 22062 22067 22070 22074 22081 22085 22088 22092 22095 22098 22102 22106 22109 22112 22115 22118 22120 22124 22127 22132 22135 22140 22143 22147 22150 22153 22157 22161 22164 22168 22174 22178 22181 22185 22188 22192 22195 22200 22206 22211 22214 22218 22221 22226 22229 22233 22236 22239 22241 22245 22248 22252 22255 22258 22262 22266 22269 22273 22276 22282 22285 22288 22290 22296 22299 22304 22310 22314 22317 22321 22324 22327 22331 22335 22338 22341 22344 22347 22349 22353 22356 22361 22364 22369 22372 22376 22379 22382 22386 22390 22393 22397 22404 22408 22411 22415 22418 22422 22425 22429 22436 22440 22443 22447 22450 22454 22457 22461 22465 22468 22470 22474 22477 22481 22484 22487 22491 22495 22498 22502 22505 22509 22512 22515 22517 22521 22524 22528 22535 22539 22542 22546 22549 22552 22556 22560 22563 22566 22569 22572 22574 22578 22581 22586 22589 22594 22597 22601 22604 22607 22611 22615 22618 22622 22630 22635 22638 22642 22645 22649 22652 22656 22664 22669 22672 22676 22679 22683 22686 22690 22693 22696 22698 22702 22705 22709 22712 22715 22719 22723 22726 22730 22733 22737 22740 22743 22745 22749 22752 22756 22763 22767 22770 22774 22777 22780 22784 22788 22791 22794 22797 22800 22802 22806 22809 22813 22816 22820 22823 22827 22830 22833 22837 22841 22844 22848 22856 22861 22864 22868 22871 22875 22878 22882 22889 22893 22896 22900 22903 22907 22910 22914 22917 22920 22922 22926 22929 22933 22936 22939 22943 22947 22950 22954 22958 22962 22965 22968 22970 22974 22977 22981 22987 22991 22994 22998 23001 23004 23008 23012 23015 23018 23021 23024 23026 23030 23033 23038 23041 23045 23048 23052 23055 23058 23062 23066 23069 23073 23081 23086 23089 23093 23096 23100 23103 23107 23114 23119 23122 23126 23129 23133 23136 23140 23143 23146 23148 23152 23155 23159 23162 23165 23169 23173 23176 23180 23183 23187 23190 23193 23195 23199 23202 23206 23213 23217 23220 23224 23227 23230 23234 23238 23241 23244 23247 23250 23252 23256 23259 23263 23266 23270 23273 23277 23280 23283 23287 23291 23294 23298 23306 23311 23314 23318 23321 23325 23328 23332 23339 23343 23346 23350 23353 23357 23360 23364 23367 23370 23372 23376 23379 23383 23386 23389 23393 23397 23400 23404 23408 23412 23415 23418 23420 23424 23427 23432 23437 23441 23444 23448 23451 23454 23458 23462 23465 23468 23471 23474 23476 23480 23483 23487 23490 23494 23497 23501 23504 23507 23511 23515 23518 23522 23530 23535 23538 23542 23545 23549 23552 23556 23563 23567 23570 23574 23577 23581 23584 23588 23591 23594 23596 23600 23603 23607 23610 23613 23617 23621 23624 23628 23632 23636 23639 23642 23644 23648 23651 23656 23661 23666 23669 23673 23676 23679 23683 23687 23690 23693 23696 23699 23701 23705 23708 23713 23716 23721 23724 23728 23731 23734 23738 23742 23745 23750 23755 23759 23762 23766 23769 23773 23776 23781 23786 23790 23793 23797 23800 23804 23807 23811 23815 23818 23820 23824 23827 23831 23834 23837 23841 23845 23848 23852 23855 23859 23862 23865 23867 23871 23874 23879 23885 1 2 4 9 14 15 35 39 40 47 48 50 51 63 993 1069 1070 1079 1080 1091 1092 2977 3064 3067 3068 3071 3072 3099 3100 1 2 4 5 993 994 997 998 2977 2978 2979 2982 3 995 998 1 2 4 8 9 995 996 998 1005 2979 2980 2989 2 5 997 998 999 2981 2982 2983 6 999 1000 1002 1003 2983 2984 2985 7 2986 2987 4 8 9 1005 1006 1008 1009 2989 2990 3037 3038 1 4 8 9 14 15 17 19 26 1009 1010 1021 1022 1025 1026 1039 2991 2992 3001 3002 3013 3014 3021 3038 10 2991 3038 11 1011 2993 12 14 15 1011 1012 1015 2993 2994 2995 2998 13 1013 1016 1 9 12 14 15 17 1013 1014 1015 1016 1019 2995 2996 2999 1 9 12 14 15 19 20 1015 1016 1027 2997 2998 3007 16 17 1017 1023 1024 3003 3004 3005 9 14 16 17 1019 1020 1022 1023 2999 3000 3001 3002 3003 18 3002 3003 9 15 19 20 1025 1031 1032 3011 3012 3013 15 19 20 1027 1028 1030 1031 3007 3008 3009 3010 3011 21 3010 3011 22 23 1033 1037 1038 3019 3035 3036 22 23 24 1036 1037 1053 1054 3015 3016 3017 3018 3035 23 24 26 1041 1042 1044 1053 3015 3023 3024 25 3018 3035 9 24 26 28 1039 1040 1043 1044 3021 3022 3023 3026 27 1041 1044 26 28 29 1043 1044 1045 3025 3026 3027 28 29 31 1045 1046 1048 1049 3027 3028 3029 3030 30 31 3030 3031 29 30 31 1049 1050 1051 3030 3031 3032 3033 32 33 35 63 70 71 78 82 1055 1131 1132 1141 1142 1153 1154 3039 3129 3130 3133 3134 3161 3162 32 33 35 36 1055 1056 1059 3039 3040 3041 3044 34 1057 1060 1 32 33 35 36 39 40 63 1057 1058 1059 1060 1067 3041 3042 3051 33 35 36 37 1059 1060 1061 3043 3044 3045 36 37 1061 1062 1064 1065 3045 3046 3047 38 3048 3049 1 35 39 40 1067 1068 1070 1071 3051 3052 3099 1 35 39 40 48 50 57 63 1071 1072 1083 1084 1087 1088 1101 3053 3054 3063 3064 3075 3076 3083 3099 3100 41 3053 3100 42 43 1073 3055 42 43 45 46 48 1073 1074 1077 1078 3055 3056 3057 3060 44 1075 1078 43 45 48 1075 1076 1078 1081 3057 3058 3061 43 46 48 51 1077 1078 1089 3059 3060 3069 1 47 48 1079 1085 1086 3064 3065 3066 3067 1 40 43 45 46 47 48 50 51 1078 1081 1082 1084 1085 3061 3062 3063 3064 49 3064 3065 1 40 48 50 51 1087 1093 1094 3071 3072 3073 3074 3075 1 46 48 50 51 1089 1090 1092 1093 3069 3070 3071 52 3072 3073 53 54 56 1095 1099 1100 3080 3081 3097 3098 53 54 55 1098 1099 1115 1116 3077 3078 3079 3080 54 55 57 1103 1104 1106 1115 3077 3085 3086 53 56 3080 3097 40 55 57 59 1101 1102 1105 1106 3083 3084 3085 3088 58 1103 1106 57 59 60 1105 1106 1107 3087 3088 3089 59 60 62 1107 1108 1110 1111 3089 3090 3091 3092 3093 61 3092 3093 60 62 1111 1112 1113 3093 3094 3095 1 32 35 40 63 64 70 101 102 109 110 113 156 280 1117 1193 1194 1203 1204 1215 1216 3101 3191 3192 3195 3196 3223 3224 63 64 66 67 70 1117 1118 1121 1122 3101 3102 3103 3106 65 1119 1122 64 66 70 1119 1120 1122 1129 3103 3104 3113 64 67 68 1121 1122 1123 3105 3106 3107 67 68 1123 1124 1126 1127 3107 3108 3109 69 3110 3111 32 63 64 66 70 71 1122 1129 1130 1132 1133 3113 3114 3161 3162 32 70 71 78 79 81 82 88 1133 1134 1145 1146 1149 1150 1163 3115 3116 3125 3126 3134 3137 3138 3145 3162 72 3115 3162 73 74 1135 3117 73 74 76 77 1135 1136 1139 1140 3117 3118 3119 3122 75 1137 1140 74 76 77 79 1137 1138 1140 1143 3119 3120 3123 74 76 77 79 82 1139 1140 1151 3121 3122 3131 32 71 78 79 82 1141 1147 1148 3125 3126 3127 3128 3129 71 76 77 78 79 82 1143 1144 1146 1147 3123 3124 3125 80 3126 3127 71 81 82 1149 1155 1156 3134 3135 3136 3137 32 71 77 78 79 81 82 1151 1152 1154 1155 3131 3132 3133 3134 83 3134 3135 84 85 1157 1161 1162 3143 3159 3160 84 85 86 1160 1161 1177 1178 3139 3140 3141 3142 3159 85 86 88 1165 1166 1168 1177 3139 3147 3148 87 3142 3159 71 86 88 90 1163 1164 1167 1168 3145 3146 3147 3150 89 1165 1168 88 90 91 1167 1168 1169 3149 3150 3151 90 91 93 1169 1170 1172 1173 3151 3152 3153 3154 3155 92 3154 3155 91 93 1173 1174 1175 3155 3156 3157 94 95 101 125 132 133 140 143 144 1179 1255 1256 1265 1266 1277 1278 3163 3253 3254 3257 3258 3285 3286 94 95 97 98 101 1179 1180 1183 1184 3163 3164 3165 3168 96 1181 1184 95 97 101 1181 1182 1184 1191 3165 3166 3175 95 98 99 1183 1184 1185 3167 3168 3169 98 99 1185 1186 1188 1189 3169 3170 3171 100 3172 3173 63 94 95 97 101 102 125 156 1191 1192 1194 1195 3175 3176 3223 63 101 102 109 110 112 113 119 156 280 1195 1196 1207 1208 1211 1212 1225 3177 3178 3187 3188 3196 3199 3200 3207 3223 3224 103 3177 3224 104 105 1197 3179 104 105 107 108 110 113 1197 1198 1201 1202 3179 3180 3181 3184 106 1199 1202 105 107 110 1199 1200 1202 1205 3181 3182 3185 105 108 113 1201 1202 1213 3183 3184 3193 63 102 109 110 1203 1209 1210 3187 3188 3189 3190 3191 63 102 105 107 109 110 113 1202 1205 1206 1208 1209 3185 3186 3187 111 3188 3189 102 112 113 1211 1217 1218 3196 3197 3198 3199 63 102 105 108 110 112 113 1202 1213 1214 1216 1217 3193 3194 3195 3196 114 3196 3197 115 116 1219 1223 1224 3205 3221 3222 115 116 117 1222 1223 1239 1240 3201 3202 3203 3204 3221 116 117 119 120 1227 1228 1230 1239 3201 3209 3210 118 3204 3221 102 117 119 121 1225 1226 1229 1230 3207 3208 3209 3212 117 120 1227 1230 119 121 122 1229 1230 1231 3211 3212 3213 121 122 124 1231 1232 1234 1235 3213 3214 3215 3216 123 124 3216 3217 122 123 124 1235 1236 1237 3216 3217 3218 3219 94 101 125 126 128 132 133 156 159 163 164 171 174 175 1241 1317 1318 1327 1328 1339 1340 3225 3315 3316 3319 3320 3347 3348 125 126 128 129 1241 1242 1245 1246 3225 3226 3227 3230 127 128 1243 1246 125 126 127 128 132 1243 1244 1246 1253 3227 3228 3237 126 129 130 1245 1246 1247 3229 3230 3231 129 130 1247 1248 1250 1251 3231 3232 3233 131 3234 3235 94 125 128 132 133 1253 1254 1256 1257 3237 3238 3285 94 125 132 133 140 141 143 144 150 1257 1258 1269 1270 1273 1274 1287 3239 3240 3249 3250 3261 3262 3269 3285 3286 134 3239 3286 135 1259 3241 136 138 139 1259 1260 1263 3241 3242 3243 3246 137 1261 1264 136 138 139 141 1261 1262 1263 1264 1267 3243 3244 3247 136 138 139 141 144 1263 1264 1275 3245 3246 3255 94 133 140 141 144 1265 1271 1272 3249 3250 3251 3252 3253 133 138 139 140 141 144 1267 1268 1270 1271 3247 3248 3249 142 3250 3251 94 133 143 144 1273 1279 1280 3257 3258 3259 3260 3261 94 133 139 140 141 143 144 1275 1276 1278 1279 3255 3256 3257 145 3258 3259 146 147 149 1281 1285 1286 3266 3267 3283 3284 146 147 148 1284 1285 1301 1302 3263 3264 3265 3266 147 148 150 1289 1290 1301 3263 3271 3272 146 149 3266 3283 133 148 150 151 152 1287 1288 1289 1291 1292 3269 3270 3271 3274 150 151 1289 1292 150 152 153 1291 1292 1293 3273 3274 3275 152 153 155 1293 1294 1296 1297 3275 3276 3277 3278 3279 154 3278 3279 153 155 1297 1298 1299 3279 3280 3281 63 101 102 125 156 157 159 187 190 194 195 202 205 206 280 1303 1379 1380 1389 1390 1401 1402 3287 3377 3378 3381 3382 3409 3410 156 157 159 160 1303 1304 1307 3287 3288 3289 3292 158 159 1305 1308 125 156 157 158 159 160 163 164 1305 1306 1307 1308 1315 3289 3290 3299 157 159 160 161 1307 1308 1309 3291 3292 3293 160 161 1309 1310 1312 1313 3293 3294 3295 162 3296 3297 125 159 163 164 1315 1316 1318 1319 3299 3300 3347 125 159 163 164 171 172 174 175 181 1319 1320 1331 1332 1335 1336 1349 3301 3302 3311 3312 3323 3324 3331 3347 3348 165 3301 3348 166 1321 3303 167 169 170 1321 1322 1325 1326 3303 3304 3305 3308 168 169 1323 1326 167 168 169 170 172 1323 1324 1326 1329 3305 3306 3309 167 169 170 172 175 1325 1326 1337 3307 3308 3317 125 164 171 172 175 1327 1333 1334 3312 3313 3314 3315 164 169 170 171 172 175 1329 1330 1332 1333 3309 3310 3311 3312 173 3312 3313 125 164 174 175 1335 1341 1342 3319 3320 3321 3322 3323 125 164 170 171 172 174 175 1337 1338 1340 1341 3317 3318 3319 176 3320 3321 177 178 1343 1347 1348 3329 3345 3346 177 178 179 1346 1347 1363 1364 3325 3326 3327 3328 3345 178 179 181 1351 1352 1354 1363 3325 3333 3334 180 3328 3345 164 179 181 183 1349 1350 1353 1354 3331 3332 3333 3336 182 1351 1354 181 183 184 1353 1354 1355 3335 3336 3337 183 184 186 1355 1356 1358 1359 3337 3338 3339 3340 3341 185 3340 3341 184 186 1359 1360 1361 3341 3342 3343 156 187 188 190 195 218 221 225 226 233 234 237 249 280 1365 1441 1442 1451 1452 1463 1464 3349 3436 3439 3440 3443 3444 3471 3472 187 188 190 191 1365 1366 1369 3349 3350 3351 3354 189 190 1367 1370 156 187 188 189 190 191 194 195 1367 1368 1369 1370 1377 3351 3352 3361 188 190 191 192 1369 1370 1371 3353 3354 3355 191 192 1371 1372 1374 1375 3355 3356 3357 193 3358 3359 156 190 194 195 1377 1378 1380 1381 3361 3362 3409 156 187 190 194 195 202 203 205 206 212 1381 1382 1393 1394 1397 1398 1411 3363 3364 3373 3374 3385 3386 3393 3409 3410 196 3363 3410 197 1383 3365 198 200 201 203 206 1383 1384 1387 1388 3365 3366 3367 3370 199 1385 1388 198 200 203 1385 1386 1388 1391 3367 3368 3371 198 201 206 1387 1388 1399 3369 3370 3379 156 195 202 203 206 1389 1395 1396 3373 3374 3375 3376 3377 195 198 200 202 203 206 1388 1391 1392 1394 1395 3371 3372 3373 204 3374 3375 156 195 205 206 1397 1403 1404 3381 3382 3383 3384 3385 156 195 198 201 202 203 205 206 1388 1399 1400 1402 1403 3379 3380 3381 207 3382 3383 208 209 211 1405 1409 1410 3390 3391 3407 3408 208 209 210 1408 1409 1425 1426 3387 3388 3389 3390 209 210 213 1413 1414 1416 1425 3387 3395 3396 208 211 3390 3407 195 212 214 1411 1412 1415 1416 3393 3394 3395 3398 210 213 1413 1416 212 214 215 1415 1416 1417 3397 3398 3399 214 215 217 1417 1418 1420 1421 3399 3400 3401 3402 3403 216 3402 3403 215 217 1421 1422 1423 3403 3404 3405 187 218 219 221 226 249 256 257 264 265 268 1427 1503 1504 1513 1514 1525 1526 3411 3498 3501 3502 3505 3506 3533 3534 218 219 221 222 1427 1428 1431 1432 3411 3412 3413 3416 220 221 1429 1432 187 218 219 220 221 225 226 1429 1430 1432 1439 3413 3414 3423 219 222 223 1431 1432 1433 3415 3416 3417 222 223 1433 1434 1436 1437 3417 3418 3419 224 3420 3421 187 221 225 226 1439 1440 1442 1443 3423 3424 3471 187 218 221 225 226 234 236 237 243 249 1443 1444 1455 1456 1459 1460 1473 3425 3426 3435 3436 3444 3447 3448 3455 3471 3472 227 3425 3472 228 229 1445 3427 228 229 231 232 1445 1446 1449 1450 3427 3428 3429 3432 230 1447 1450 229 231 232 234 237 1447 1448 1450 1453 3429 3430 3433 229 231 232 237 1449 1450 1461 3431 3432 3441 187 233 234 1451 1457 1458 3436 3437 3438 3439 187 226 231 233 234 237 1453 1454 1456 1457 3433 3434 3435 3436 235 3436 3437 226 236 237 1459 1465 1466 3444 3445 3446 3447 187 226 231 232 234 236 237 1461 1462 1464 1465 3441 3442 3443 3444 238 3444 3445 239 240 1467 1471 1472 3453 3469 3470 239 240 241 1470 1471 1487 1488 3449 3450 3451 3452 3469 240 241 243 1475 1476 1478 1487 3449 3457 3458 242 3452 3469 226 241 243 245 1473 1474 1477 1478 3455 3456 3457 3460 244 1475 1478 243 245 246 1477 1478 1479 3459 3460 3461 245 246 248 1479 1480 1482 1483 3461 3462 3463 3464 3465 247 3464 3465 246 248 1483 1484 1485 3465 3466 3467 187 218 226 249 250 252 256 257 280 287 288 294 295 296 298 299 1489 1565 1566 1575 1576 1587 1588 3473 3563 3564 3567 3568 3595 3596 249 250 252 253 1489 1490 1493 1494 3473 3474 3475 3478 251 1491 1494 249 250 252 256 1491 1492 1494 1501 3475 3476 3485 250 253 254 1493 1494 1495 3477 3478 3479 253 254 1495 1496 1498 1499 3479 3480 3481 255 3482 3483 218 249 252 256 257 1501 1502 1504 1505 3485 3486 3533 3534 218 249 256 257 265 267 268 274 1505 1506 1517 1518 1521 1522 1535 3487 3488 3497 3498 3506 3509 3510 3517 3534 258 3487 3534 259 260 1507 3489 259 260 262 263 265 268 1507 1508 1511 1512 3489 3490 3491 3494 261 1509 1512 260 262 265 1509 1510 1512 1515 3491 3492 3495 260 263 268 1511 1512 1523 3493 3494 3503 218 264 265 1513 1519 1520 3498 3499 3500 3501 218 257 260 262 264 265 268 1512 1515 1516 1518 1519 3495 3496 3497 3498 266 3498 3499 257 267 268 1521 1527 1528 3506 3507 3508 3509 218 257 260 263 265 267 268 1512 1523 1524 1526 1527 3503 3504 3505 3506 269 3506 3507 270 271 1529 1533 1534 3515 3531 3532 270 271 272 1532 1533 1549 1550 3511 3512 3513 3514 3531 271 272 274 1537 1538 1540 1549 3511 3519 3520 273 3514 3531 257 272 274 276 1535 1536 1539 1540 3517 3518 3519 3522 275 1537 1540 274 276 277 1539 1540 1541 3521 3522 3523 276 277 279 1541 1542 1544 1545 3523 3524 3525 3526 3527 278 3526 3527 277 279 1545 1546 1547 3527 3528 3529 63 102 156 187 249 280 281 283 287 288 311 318 319 324 325 326 329 330 1551 1627 1628 1637 1638 1649 1650 3535 3625 3626 3629 3630 3657 3658 280 281 283 284 1551 1552 1555 1556 3535 3536 3537 3540 282 1553 1556 280 281 283 287 1553 1554 1556 1563 3537 3538 3547 281 284 285 1555 1556 1557 3539 3540 3541 284 285 1557 1558 1560 1561 3541 3542 3543 286 3544 3545 249 280 283 287 288 1563 1564 1566 1567 3547 3548 3595 249 280 287 288 294 295 296 298 305 1567 1568 1579 1580 1583 1584 1597 3549 3550 3559 3560 3571 3572 3579 3595 3596 289 3549 3596 290 291 1569 3551 290 291 293 294 1569 1570 1573 1574 3551 3552 3553 3556 292 1571 1574 291 293 294 296 1571 1572 1574 1577 3553 3554 3557 249 288 291 293 294 296 298 299 1573 1574 1585 3555 3556 3565 249 288 295 296 1575 1581 1582 3559 3560 3561 3562 3563 249 288 293 294 295 296 1577 1578 1580 1581 3557 3558 3559 297 3560 3561 249 288 294 298 299 1583 1589 1590 3567 3568 3569 3570 3571 249 294 298 299 1585 1586 1588 1589 3565 3566 3567 300 3568 3569 301 302 1591 1595 1596 3577 3593 3594 301 302 303 1594 1595 1611 1612 3573 3574 3575 3576 3593 302 303 305 1599 1600 1602 1611 3573 3581 3582 304 3576 3593 288 303 305 307 1597 1598 1601 1602 3579 3580 3581 3584 306 1599 1602 305 307 308 1601 1602 1603 3583 3584 3585 307 308 310 1603 1604 1606 1607 3585 3586 3587 3588 3589 309 3588 3589 308 310 1607 1608 1609 3589 3590 3591 280 311 312 314 318 319 342 349 350 355 356 357 358 360 361 404 1613 1689 1690 1699 1700 1711 1712 3597 3687 3688 3691 3692 3719 3720 311 312 314 315 1613 1614 1617 1618 3597 3598 3599 3602 313 1615 1618 311 312 314 318 1615 1616 1618 1625 3599 3600 3609 312 315 316 1617 1618 1619 3601 3602 3603 315 316 1619 1620 1622 1623 3603 3604 3605 317 3606 3607 280 311 314 318 319 1625 1626 1628 1629 3609 3610 3657 280 311 318 319 324 325 326 327 329 330 336 1629 1630 1641 1642 1645 1646 1659 3611 3612 3621 3622 3633 3634 3641 3657 3658 320 3611 3658 321 322 1631 3613 321 322 324 325 1631 1632 1635 3613 3614 3615 3618 323 1633 1636 280 319 322 324 325 326 327 1633 1634 1635 1636 1639 3615 3616 3619 280 319 322 324 325 330 1635 1636 1647 3617 3618 3627 280 319 324 326 327 1637 1643 1644 3621 3622 3623 3624 3625 319 324 326 327 1639 1640 1642 1643 3619 3620 3621 328 3622 3623 280 319 329 330 1645 1651 1652 3629 3630 3631 3632 3633 280 319 325 329 330 1647 1648 1650 1651 3627 3628 3629 331 3630 3631 332 333 1653 1657 1658 3639 3655 3656 332 333 334 1656 1657 1673 1674 3635 3636 3637 3638 3655 333 334 336 1661 1662 1664 1673 3635 3643 3644 335 3638 3655 319 334 336 338 1659 1660 1663 1664 3641 3642 3643 3646 337 1661 1664 336 338 339 1663 1664 1665 3645 3646 3647 338 339 341 1665 1666 1668 1669 3647 3648 3649 3650 340 341 3650 3651 339 340 341 1669 1670 1671 3650 3651 3652 3653 311 342 343 345 349 350 373 376 380 381 386 387 388 391 392 404 1675 1751 1752 1761 1762 1773 1774 3659 3749 3750 3753 3754 3781 3782 342 343 345 346 1675 1676 1679 1680 3659 3660 3661 3664 344 1677 1680 342 343 345 349 1677 1678 1680 1687 3661 3662 3671 343 346 347 1679 1680 1681 3663 3664 3665 346 347 1681 1682 1684 1685 3665 3666 3667 348 3668 3669 311 342 345 349 350 1687 1688 1690 1691 3671 3672 3719 311 342 349 350 355 356 357 358 360 367 1691 1692 1703 1704 1707 1708 1721 3673 3674 3683 3684 3695 3696 3703 3719 3720 351 3673 3720 352 353 1693 3675 352 353 355 356 1693 1694 1697 3675 3676 3677 3680 354 1695 1698 311 350 353 355 356 358 1695 1696 1697 1698 1701 3677 3678 3681 311 350 353 355 356 360 361 1697 1698 1709 3679 3680 3689 311 350 357 358 1699 1705 1706 3683 3684 3685 3686 3687 311 350 355 357 358 1701 1702 1704 1705 3681 3682 3683 359 3684 3685 311 350 356 360 361 1707 1713 1714 3691 3692 3693 3694 3695 311 356 360 361 1709 1710 1712 1713 3689 3690 3691 362 3692 3693 363 364 1715 1719 1720 3701 3717 3718 363 364 365 1718 1719 1735 1736 3697 3698 3699 3700 3717 364 365 367 1723 1724 1726 1735 3697 3705 3706 366 3700 3717 350 365 367 369 1721 1722 1725 1726 3703 3704 3705 3708 368 1723 1726 367 369 370 1725 1726 1727 3707 3708 3709 369 370 372 1727 1728 1730 1731 3709 3710 3711 3712 3713 371 3712 3713 370 372 1731 1732 1733 3713 3714 3715 342 373 374 376 381 404 407 411 412 418 419 420 423 1737 1813 1814 1823 1824 1835 1836 3721 3811 3812 3815 3816 3843 3844 373 374 376 377 1737 1738 1741 1742 3721 3722 3723 3726 375 1739 1742 342 373 374 376 380 381 1739 1740 1742 1749 3723 3724 3733 374 377 378 1741 1742 1743 3725 3726 3727 377 378 1743 1744 1746 1747 3727 3728 3729 379 3730 3731 342 376 380 381 1749 1750 1752 1753 3733 3734 3781 3782 342 373 376 380 381 386 387 388 389 391 398 1753 1754 1765 1766 1769 1770 1783 3735 3736 3745 3746 3757 3758 3765 3782 382 3735 3782 383 384 1755 3737 383 384 386 387 1755 1756 1759 3737 3738 3739 3742 385 1757 1760 342 381 384 386 387 388 389 1757 1758 1759 1760 1763 3739 3740 3743 342 381 384 386 387 391 392 1759 1760 1771 3741 3742 3751 342 381 386 388 389 1761 1767 1768 3745 3746 3747 3748 3749 381 386 388 389 1763 1764 1766 1767 3743 3744 3745 390 3746 3747 342 381 387 391 392 1769 1775 1776 3754 3755 3756 3757 342 387 391 392 1771 1772 1774 1775 3751 3752 3753 3754 393 3754 3755 394 395 1777 1781 1782 3763 3779 3780 394 395 396 1780 1781 1797 1798 3759 3760 3761 3762 3779 395 396 398 1785 1786 1788 1797 3759 3767 3768 397 3762 3779 381 396 398 400 1783 1784 1787 1788 3765 3766 3767 3770 399 1785 1788 398 400 401 1787 1788 1789 3769 3770 3771 400 401 403 1789 1790 1792 1793 3771 3772 3773 3774 3775 402 3774 3775 401 403 1793 1794 1795 3775 3776 3777 311 342 373 404 405 407 435 442 443 449 450 451 453 454 497 1799 1875 1876 1885 1886 1897 1898 3783 3870 3873 3874 3877 3878 3905 3906 404 405 407 408 1799 1800 1803 1804 3783 3784 3785 3788 406 1801 1804 373 404 405 407 411 412 1801 1802 1804 1811 3785 3786 3795 405 408 409 1803 1804 1805 3787 3788 3789 408 409 1805 1806 1808 1809 3789 3790 3791 410 3792 3793 373 407 411 412 1811 1812 1814 1815 3795 3796 3843 373 407 411 412 418 419 420 422 423 429 1815 1816 1827 1828 1831 1832 1845 3797 3798 3807 3808 3816 3819 3820 3827 3843 3844 413 3797 3844 414 415 1817 3799 414 415 417 418 1817 1818 1821 3799 3800 3801 3804 416 1819 1822 415 417 418 420 1819 1820 1821 1822 1825 3801 3802 3805 373 412 415 417 418 420 423 1821 1822 1833 3803 3804 3813 373 412 419 420 1823 1829 1830 3807 3808 3809 3810 3811 373 412 417 418 419 420 1825 1826 1828 1829 3805 3806 3807 421 3808 3809 412 422 423 1831 1837 1838 3816 3817 3818 3819 373 412 418 422 423 1833 1834 1836 1837 3813 3814 3815 3816 424 3816 3817 425 426 1839 1843 1844 3825 3841 3842 425 426 427 1842 1843 1859 1860 3821 3822 3823 3824 3841 426 427 429 1847 1848 1850 1859 3821 3829 3830 428 3824 3841 412 427 429 431 1845 1846 1849 1850 3827 3828 3829 3832 430 1847 1850 429 431 432 1849 1850 1851 3831 3832 3833 431 432 434 1851 1852 1854 1855 3833 3834 3835 3836 3837 433 3836 3837 432 434 1855 1856 1857 3837 3838 3839 404 435 436 442 443 466 473 474 479 480 481 485 497 1861 1937 1938 1947 1948 1959 1960 3845 3935 3936 3939 3940 3967 3968 435 436 438 439 442 1861 1862 1865 1866 3845 3846 3847 3850 437 1863 1866 436 438 442 1863 1864 1866 1873 3847 3848 3857 436 439 440 1865 1866 1867 3849 3850 3851 439 440 1867 1868 1870 1871 3851 3852 3853 441 3854 3855 404 435 436 438 442 443 1866 1873 1874 1876 1877 3857 3858 3905 404 435 442 443 449 451 453 454 460 497 1877 1878 1889 1890 1893 1894 1907 3859 3860 3869 3870 3881 3882 3889 3905 3906 444 3859 3906 445 446 1879 3861 445 446 448 449 451 1879 1880 1883 1884 3861 3862 3863 3866 447 1881 1884 446 448 451 1881 1882 1884 1887 3863 3864 3867 404 443 446 449 451 454 1883 1884 1895 3865 3866 3875 404 450 451 1885 1891 1892 3870 3871 3872 3873 404 443 446 448 449 450 451 1884 1887 1888 1890 1891 3867 3868 3869 3870 452 3870 3871 404 443 453 454 1893 1899 1900 3877 3878 3879 3880 3881 404 443 449 453 454 1895 1896 1898 1899 3875 3876 3877 455 3878 3879 456 457 1901 1905 1906 3887 3903 3904 456 457 458 1904 1905 1921 1922 3883 3884 3885 3886 3903 457 458 460 1909 1910 1912 1921 3883 3891 3892 459 3886 3903 443 458 460 462 1907 1908 1911 1912 3889 3890 3891 3894 461 1909 1912 460 462 463 1911 1912 1913 3893 3894 3895 462 463 465 1913 1914 1916 1917 3895 3896 3897 3898 464 465 3898 3899 463 464 465 1917 1918 1919 3898 3899 3900 3901 435 466 467 473 474 497 504 505 512 513 515 516 1923 1999 2000 2009 2010 2021 2022 3907 3997 3998 4001 4002 4029 4030 466 467 469 470 473 1923 1924 1927 1928 3907 3908 3909 3912 468 1925 1928 467 469 473 1925 1926 1928 1935 3909 3910 3919 467 470 471 1927 1928 1929 3911 3912 3913 470 471 1929 1930 1932 1933 3913 3914 3915 472 3916 3917 435 466 467 469 473 474 1928 1935 1936 1938 1939 3919 3920 3967 435 466 473 474 479 480 481 482 484 485 491 497 1939 1940 1951 1952 1955 1956 1969 3921 3922 3931 3932 3940 3943 3944 3951 3967 3968 475 3921 3968 476 477 1941 3923 476 477 479 480 1941 1942 1945 3923 3924 3925 3928 478 1943 1946 435 474 477 479 480 481 482 1943 1944 1945 1946 1949 3925 3926 3929 435 474 477 479 480 485 1945 1946 1957 3927 3928 3937 435 474 479 481 482 1947 1953 1954 3932 3933 3934 3935 474 479 481 482 1949 1950 1952 1953 3929 3930 3931 3932 483 3932 3933 474 484 485 1955 1961 1962 3940 3941 3942 3943 435 474 480 484 485 1957 1958 1960 1961 3937 3938 3939 3940 486 3940 3941 487 488 490 1963 1967 1968 3948 3949 3965 3966 487 488 489 1966 1967 1983 1984 3945 3946 3947 3948 488 489 1971 1972 1974 1983 3945 3953 3954 487 490 3948 3965 474 491 493 1969 1970 1973 1974 3951 3952 3953 3956 492 1971 1974 491 493 494 1973 1974 1975 3955 3956 3957 493 494 496 1975 1976 1978 1979 3957 3958 3959 3960 3961 495 3960 3961 494 496 1979 1980 1981 3961 3962 3963 404 435 443 466 474 497 498 504 535 536 541 542 543 546 547 714 776 869 1985 2061 2062 2071 2072 2083 2084 3969 4059 4060 4063 4064 4091 4092 497 498 500 501 504 1985 1986 1989 1990 3969 3970 3971 3974 499 1987 1990 498 500 504 1987 1988 1990 1997 3971 3972 3981 498 501 502 1989 1990 1991 3973 3974 3975 501 502 1991 1992 1994 1995 3975 3976 3977 503 3978 3979 466 497 498 500 504 505 1990 1997 1998 2000 2001 3981 3982 4029 4030 466 504 505 512 513 515 516 522 2001 2002 2013 2014 2017 2018 2031 3983 3984 3993 3994 4005 4006 4013 4030 506 3983 4030 507 508 2003 3985 507 508 510 511 2003 2004 2007 3985 3986 3987 3990 509 2005 2008 508 510 511 513 2005 2006 2007 2008 2011 3987 3988 3991 508 510 511 513 516 2007 2008 2019 3989 3990 3999 466 505 512 513 2009 2015 2016 3993 3994 3995 3996 3997 466 505 510 511 512 513 516 2011 2012 2014 2015 3991 3992 3993 514 3994 3995 466 505 515 516 2017 2023 2024 4002 4003 4004 4005 466 505 511 513 515 516 2019 2020 2022 2023 3999 4000 4001 4002 517 4002 4003 518 519 2025 2029 2030 4011 4027 4028 518 519 520 2028 2029 2045 2046 4007 4008 4009 4010 4027 519 520 522 2033 2034 2036 2045 4007 4015 4016 521 4010 4027 505 520 522 524 2031 2032 2035 2036 4013 4014 4015 4018 523 2033 2036 522 524 525 2035 2036 2037 4017 4018 4019 524 525 527 2037 2038 2040 2041 4019 4020 4021 4022 4023 526 4022 4023 525 527 2041 2042 2043 4023 4024 4025 528 529 531 535 559 566 567 573 574 577 578 2047 2123 2124 2133 2134 2145 2146 4031 4121 4122 4125 4126 4153 4154 528 529 531 532 2047 2048 2051 2052 4031 4032 4033 4036 530 2049 2052 528 529 531 535 2049 2050 2052 2059 4033 4034 4043 529 532 533 2051 2052 2053 4035 4036 4037 532 533 2053 2054 2056 2057 4037 4038 4039 534 4040 4041 497 528 531 535 536 559 590 621 652 655 686 714 2059 2060 2062 2063 4043 4044 4091 4092 497 535 536 541 542 543 544 546 553 714 776 869 2063 2064 2075 2076 2079 2080 2093 4045 4046 4055 4056 4067 4068 4075 4092 537 4045 4092 538 539 2065 4047 538 539 541 542 2065 2066 2069 2070 4047 4048 4049 4052 540 2067 2070 497 536 539 541 542 543 544 2067 2068 2070 2073 4049 4050 4053 497 536 539 541 542 546 547 2069 2070 2081 4051 4052 4061 497 536 541 543 544 2071 2077 2078 4056 4057 4058 4059 536 541 543 544 2073 2074 2076 2077 4053 4054 4055 4056 545 4056 4057 497 536 542 546 547 2079 2085 2086 4064 4065 4066 4067 497 542 546 547 2081 2082 2084 2085 4061 4062 4063 4064 548 4064 4065 549 550 2087 2091 2092 4073 4089 4090 549 550 551 2090 2091 2107 2108 4069 4070 4071 4072 4089 550 551 553 2095 2096 2098 2107 4069 4077 4078 552 4072 4089 536 551 553 555 2093 2094 2097 2098 4075 4076 4077 4080 554 2095 2098 553 555 556 2097 2098 2099 4079 4080 4081 555 556 558 2099 2100 2102 2103 4081 4082 4083 4084 4085 557 4084 4085 556 558 2103 2104 2105 4085 4086 4087 528 535 559 560 566 567 590 597 598 604 605 606 608 609 2109 2185 2186 2195 2196 2207 2208 4093 4180 4183 4184 4187 4188 4215 4216 559 560 562 563 566 2109 2110 2113 2114 4093 4094 4095 4098 561 2111 2114 560 562 566 2111 2112 2114 2121 4095 4096 4105 560 563 564 2113 2114 2115 4097 4098 4099 563 564 2115 2116 2118 2119 4099 4100 4101 565 4102 4103 528 559 560 562 566 567 2114 2121 2122 2124 2125 4105 4106 4153 528 559 566 567 572 573 574 575 577 584 2125 2126 2137 2138 2141 2142 2155 4107 4108 4117 4118 4129 4130 4137 4153 4154 568 4107 4154 569 570 2127 4109 569 570 572 573 2127 2128 2131 4109 4110 4111 4114 571 2129 2132 567 570 572 573 574 575 2129 2130 2131 2132 2135 4111 4112 4115 528 567 570 572 573 574 577 578 2131 2132 2143 4113 4114 4123 528 567 572 573 574 575 2133 2139 2140 4117 4118 4119 4120 4121 567 572 574 575 2135 2136 2138 2139 4115 4116 4117 576 4118 4119 528 567 573 577 578 2141 2147 2148 4125 4126 4127 4128 4129 528 573 577 578 2143 2144 2146 2147 4123 4124 4125 579 4126 4127 580 581 2149 2153 2154 4135 4151 4152 580 581 582 2152 2153 2169 2170 4131 4132 4133 4134 4151 581 582 584 2157 2158 2160 2169 4131 4139 4140 583 4134 4151 567 582 584 586 2155 2156 2159 2160 4137 4138 4139 4142 585 2157 2160 584 586 587 2159 2160 2161 4141 4142 4143 586 587 589 2161 2162 2164 2165 4143 4144 4145 4146 4147 588 4146 4147 587 589 2165 2166 2167 4147 4148 4149 535 559 590 591 597 598 621 628 629 636 637 639 640 2171 2247 2248 2257 2258 2269 2270 4155 4245 4246 4249 4250 4277 4278 590 591 593 594 597 2171 2172 2175 2176 4155 4156 4157 4160 592 2173 2176 591 593 597 2173 2174 2176 2183 4157 4158 4167 591 594 595 2175 2176 2177 4159 4160 4161 594 595 2177 2178 2180 2181 4161 4162 4163 596 4164 4165 559 590 591 593 597 598 2183 2184 2186 2187 4167 4168 4215 4216 559 590 597 598 604 606 608 609 615 2187 2188 2199 2200 2203 2204 2217 4169 4170 4179 4180 4191 4192 4199 4216 599 4169 4216 600 601 2189 4171 600 601 603 604 2189 2190 2193 4171 4172 4173 4176 602 2191 2194 601 603 604 606 2191 2192 2193 2194 2197 4173 4174 4177 559 598 601 603 604 606 609 2193 2194 2205 4175 4176 4185 559 605 606 2195 2201 2202 4180 4181 4182 4183 559 598 603 604 605 606 2197 2198 2200 2201 4177 4178 4179 4180 607 4180 4181 559 598 608 609 2203 2209 2210 4188 4189 4190 4191 559 598 604 608 609 2205 2206 2208 2209 4185 4186 4187 4188 610 4188 4189 611 612 2211 2215 2216 4197 4213 4214 611 612 613 614 2214 2215 2231 2232 4193 4194 4195 4213 612 613 615 2219 2220 2222 2231 4193 4201 4202 612 614 4195 4196 4213 598 613 615 617 2217 2218 2221 2222 4199 4200 4201 4204 616 2219 2222 615 617 618 2221 2222 2223 4203 4204 4205 617 618 619 620 2223 2224 2226 2227 4205 4206 4207 4209 618 619 4207 4208 4209 618 620 2227 2228 2229 4209 4210 4211 535 590 621 622 628 629 655 659 660 667 668 670 671 2233 2309 2310 2319 2320 2331 2332 4217 4307 4308 4311 4312 4339 4340 621 622 624 625 628 2233 2234 2237 4217 4218 4219 4222 623 2235 2238 622 624 625 628 2235 2236 2237 2238 2245 4219 4220 4229 622 624 625 626 2237 2238 2239 4221 4222 4223 625 626 2239 2240 2242 2243 4223 4224 4225 627 4226 4227 590 621 622 624 628 629 2245 2246 2248 2249 4229 4230 4277 4278 590 621 628 629 636 637 639 646 2249 2250 2261 2262 2265 2266 2279 4231 4232 4241 4242 4253 4254 4261 4278 630 4231 4278 631 2251 4233 632 634 635 2251 2252 2255 4233 4234 4235 4238 633 2253 2256 632 634 635 637 640 2253 2254 2255 2256 2259 4235 4236 4239 632 634 635 640 2255 2256 2267 4237 4238 4247 590 629 636 637 638 2257 2263 2264 4241 4242 4243 4244 4245 590 629 634 636 637 639 640 2259 2260 2262 2263 4239 4240 4241 636 638 4242 4243 590 629 637 639 640 641 2265 2271 2272 4250 4251 4252 4253 590 634 635 637 639 640 2267 2268 2270 2271 4247 4248 4249 4250 639 641 4250 4251 642 645 2273 2277 2278 4258 4259 4275 4276 643 644 2276 2277 2293 2294 4255 4256 4257 4258 643 644 646 2281 2282 2284 2293 4255 4263 4264 642 645 4258 4275 629 644 646 648 2279 2280 2283 2284 4261 4262 4263 4266 647 2281 2284 646 648 649 2283 2284 2285 4265 4266 4267 648 649 650 2285 2286 2288 2289 4267 4268 4269 4271 649 650 4269 4270 4271 651 2289 2290 2291 4271 4272 4273 535 652 653 655 686 690 691 698 701 702 2295 2371 2372 2381 2382 2393 2394 4279 4369 4370 4373 4374 4401 4402 652 653 655 656 2295 2296 2299 4279 4280 4281 4284 654 2297 2300 535 621 652 653 655 656 659 660 2297 2298 2299 2300 2307 4281 4282 4291 653 655 656 657 2299 2300 2301 4283 4284 4285 656 657 2301 2302 2304 2305 4285 4286 4287 658 4288 4289 621 655 659 660 2307 2308 2310 2311 4291 4292 4339 621 655 659 660 667 668 670 677 2311 2312 2323 2324 2327 2328 2341 4293 4294 4303 4304 4315 4316 4323 4339 4340 661 4293 4340 662 2313 4295 663 665 666 2313 2314 2317 4295 4296 4297 4300 664 2315 2318 663 665 666 668 671 2315 2316 2317 2318 2321 4297 4298 4301 663 665 666 671 2317 2318 2329 4299 4300 4309 621 660 667 668 669 2319 2325 2326 4303 4304 4305 4306 4307 621 660 665 667 668 670 671 2321 2322 2324 2325 4301 4302 4303 667 669 4304 4305 621 660 668 670 671 672 2327 2333 2334 4312 4313 4314 4315 621 665 666 668 670 671 2329 2330 2332 2333 4309 4310 4311 4312 670 672 4312 4313 673 674 2335 2339 2340 4321 4337 4338 673 674 675 676 2338 2339 2355 2356 4317 4318 4319 4337 674 675 677 2343 2344 2346 2355 4317 4325 4326 674 676 4319 4320 4337 660 675 677 679 2341 2342 2345 2346 4323 4324 4325 4328 678 2343 2346 677 679 680 2345 2346 2347 4327 4328 4329 679 680 681 682 2347 2348 2350 2351 4329 4330 4331 4333 680 681 4331 4332 4333 680 682 2351 2352 2353 4333 4334 4335 683 684 686 717 721 722 729 730 732 733 2357 2433 2434 2443 2444 2455 2456 4341 4431 4432 4435 4436 4463 4464 683 684 686 687 2357 2358 2361 4341 4342 4343 4346 685 2359 2362 535 652 683 684 686 687 690 691 714 715 717 2359 2360 2361 2362 2369 4343 4344 4353 684 686 687 2361 2362 2363 4345 4346 4347 688 2363 2364 2366 2367 4347 4348 4349 689 4350 4351 652 686 690 691 2369 2370 2372 2373 4353 4354 4401 652 686 690 691 698 699 701 702 708 2373 2374 2385 2386 2389 2390 2403 4355 4356 4365 4366 4377 4378 4385 4401 4402 692 4355 4402 693 2375 4357 694 696 697 699 702 2375 2376 2379 2380 4357 4358 4359 4362 695 2377 2380 694 696 699 2377 2378 2380 2383 4359 4360 4363 694 697 702 2379 2380 2391 4361 4362 4371 652 691 698 699 700 702 2381 2387 2388 4366 4367 4368 4369 691 694 696 698 699 702 2380 2383 2384 2386 2387 4363 4364 4365 4366 698 700 4366 4367 652 691 701 702 703 2389 2395 2396 4374 4375 4376 4377 652 691 694 697 698 699 701 702 2380 2391 2392 2394 2395 4371 4372 4373 4374 701 703 4374 4375 704 2397 2401 2402 4383 4399 4400 705 706 707 2400 2401 2417 2418 4379 4380 4381 4399 705 706 708 2405 2406 2408 2417 4379 4387 4388 705 707 4381 4382 4399 691 706 708 710 2403 2404 2407 2408 4385 4386 4387 4390 709 2405 2408 708 710 711 2407 2408 2409 4389 4390 4391 710 711 2409 2410 2412 2413 4391 4392 4393 4394 712 713 4394 4395 712 713 2413 2414 2415 4394 4395 4396 4397 497 535 536 686 714 715 745 748 752 753 760 761 763 764 776 2419 2495 2496 2505 2506 2517 2518 4403 4493 4494 4497 4498 4525 4526 686 714 715 717 718 2419 2420 2423 4403 4404 4405 4408 716 2421 2424 683 686 715 717 718 721 722 2421 2422 2423 2424 2431 4405 4406 4415 715 717 718 719 2423 2424 2425 4407 4408 4409 718 719 2425 2426 2428 2429 4409 4410 4411 720 4412 4413 683 717 721 722 2431 2432 2434 2435 4415 4416 4463 4464 683 717 721 722 729 730 732 733 739 2435 2436 2447 2448 2451 2452 2465 4417 4418 4427 4428 4439 4440 4447 4464 723 4417 4464 724 2437 4419 725 727 728 2437 2438 2441 2442 4419 4420 4421 4424 726 2439 2442 725 727 728 730 733 2439 2440 2442 2445 4421 4422 4425 725 727 728 733 2441 2442 2453 4423 4424 4433 683 722 729 730 731 2443 2449 2450 4427 4428 4429 4430 4431 683 722 727 729 730 733 2445 2446 2448 2449 4425 4426 4427 729 731 4428 4429 683 722 732 733 734 2451 2457 2458 4436 4437 4438 4439 683 722 727 728 730 732 733 2453 2454 2456 2457 4433 4434 4435 4436 732 734 4436 4437 735 736 2459 2463 2464 4445 4461 4462 735 736 737 738 2462 2463 2479 2480 4441 4442 4443 4461 736 737 739 2467 2468 2470 2479 4441 4449 4450 736 738 4443 4444 4461 722 737 739 741 2465 2466 2469 2470 4447 4448 4449 4452 740 2467 2470 739 741 742 2469 2470 2471 4451 4452 4453 741 742 743 744 2471 2472 2474 2475 4453 4454 4455 4457 742 743 4455 4456 4457 742 744 2475 2476 2477 4457 4458 4459 714 745 746 748 753 776 783 784 791 792 795 2481 2557 2558 2567 2568 2579 2580 4465 4552 4555 4556 4559 4560 4587 4588 745 746 748 749 2481 2482 2485 4465 4466 4467 4470 747 2483 2486 714 745 746 748 749 752 753 2483 2484 2485 2486 2493 4467 4468 4477 746 748 749 750 2485 2486 2487 4469 4470 4471 749 750 2487 2488 2490 2491 4471 4472 4473 751 4474 4475 714 748 752 753 2493 2494 2496 2497 4477 4478 4525 714 745 748 752 753 760 761 763 770 2497 2498 2509 2510 2513 2514 2527 4479 4480 4489 4490 4501 4502 4509 4525 4526 754 4479 4526 755 2499 4481 756 758 759 2499 2500 2503 4481 4482 4483 4486 757 2501 2504 756 758 759 761 764 2501 2502 2503 2504 2507 4483 4484 4487 756 758 759 764 2503 2504 2515 4485 4486 4495 714 753 760 761 762 2505 2511 2512 4490 4491 4492 4493 714 753 758 760 761 763 764 2507 2508 2510 2511 4487 4488 4489 4490 760 762 4490 4491 714 753 761 763 764 2513 2519 2520 4497 4498 4499 4500 4501 714 758 759 761 763 764 2515 2516 2518 2519 4495 4496 4497 765 4498 4499 766 2521 2525 2526 4507 4523 4524 767 769 2524 2525 2541 2542 4503 4504 4505 4523 768 770 2529 2530 2532 2541 4503 4511 4512 767 769 4505 4506 4523 753 768 770 772 2527 2528 2531 2532 4509 4510 4511 4514 771 2529 2532 770 772 773 2531 2532 2533 4513 4514 4515 772 773 774 2533 2534 2536 2537 4515 4516 4517 4519 773 774 4517 4518 4519 775 2537 2538 2539 4519 4520 4521 497 536 714 745 776 777 783 784 807 814 815 822 823 826 869 2543 2619 2620 2629 2630 2641 2642 4527 4614 4617 4618 4621 4622 4649 4650 776 777 779 780 783 2543 2544 2547 2548 4527 4528 4529 4532 778 2545 2548 777 779 783 2545 2546 2548 2555 4529 4530 4539 777 780 781 2547 2548 2549 4531 4532 4533 780 781 2549 2550 2552 2553 4533 4534 4535 782 4536 4537 745 776 777 779 783 784 2548 2555 2556 2558 2559 4539 4540 4587 4588 745 776 783 784 792 794 795 801 2559 2560 2571 2572 2575 2576 2589 4541 4542 4551 4552 4560 4563 4564 4571 4588 785 4541 4588 786 2561 4543 787 789 790 2561 2562 2565 4543 4544 4545 4548 788 2563 2566 787 789 790 792 795 2563 2564 2565 2566 2569 4545 4546 4549 787 789 790 795 2565 2566 2577 4547 4548 4557 745 791 792 793 2567 2573 2574 4552 4553 4554 4555 745 784 789 791 792 795 2569 2570 2572 2573 4549 4550 4551 4552 791 793 4552 4553 784 794 795 796 2575 2581 2582 4560 4561 4562 4563 745 784 789 790 792 794 795 2577 2578 2580 2581 4557 4558 4559 4560 794 796 4560 4561 797 798 800 2583 2587 2588 4567 4568 4569 4585 4586 797 798 799 2586 2587 2603 2604 4565 4566 4567 798 799 801 2591 2592 2594 2603 4565 4573 4574 797 800 4568 4585 784 799 801 803 2589 2590 2593 2594 4571 4572 4573 4576 802 2591 2594 801 803 804 2593 2594 2595 4575 4576 4577 803 804 805 806 2595 2596 2598 2599 4577 4578 4579 804 805 806 4579 4580 4581 804 805 806 2599 2600 2601 4581 4582 4583 776 807 808 814 815 838 845 846 852 853 854 857 869 2605 2681 2682 2691 2692 2703 2704 4589 4676 4679 4680 4683 4684 4711 4712 807 808 810 811 814 2605 2606 2609 2610 4589 4590 4591 4594 809 2607 2610 808 810 814 2607 2608 2610 2617 4591 4592 4601 808 811 812 2609 2610 2611 4593 4594 4595 811 812 2611 2612 2614 2615 4595 4596 4597 813 4598 4599 776 807 808 810 814 815 2610 2617 2618 2620 2621 4601 4602 4649 4650 776 807 814 815 823 825 826 832 2621 2622 2633 2634 2637 2638 2651 4603 4604 4613 4614 4622 4625 4626 4633 4650 816 4603 4650 817 2623 4605 818 820 821 2623 2624 2627 4605 4606 4607 4610 819 2625 2628 818 820 821 823 826 2625 2626 2627 2628 2631 4607 4608 4611 818 820 821 826 2627 2628 2639 4609 4610 4619 776 822 823 2629 2635 2636 4614 4615 4616 4617 776 815 820 822 823 826 2631 2632 2634 2635 4611 4612 4613 4614 824 4614 4615 815 825 826 2637 2643 2644 4622 4623 4624 4625 776 815 820 821 823 825 826 2639 2640 2642 2643 4619 4620 4621 4622 827 4622 4623 828 829 831 2645 2649 2650 4631 4647 4648 828 829 830 831 2648 2649 2665 2666 4627 4628 4629 829 830 832 2653 2654 2656 2665 4627 4635 4636 828 829 831 4629 4630 4647 815 830 832 834 2651 2652 2655 2656 4633 4634 4635 4638 833 2653 2656 832 834 835 2655 2656 2657 4637 4638 4639 834 835 836 837 2657 2658 2660 2661 4639 4640 4641 835 836 837 4641 4642 4643 835 836 837 2661 2662 2663 4643 4644 4645 807 838 839 845 846 869 876 877 884 885 887 888 2667 2743 2744 2753 2754 2765 2766 4651 4738 4741 4742 4745 4746 4773 4774 838 839 841 842 845 2667 2668 2671 2672 4651 4652 4653 4656 840 2669 2672 839 841 845 2669 2670 2672 2679 4653 4654 4663 839 842 843 2671 2672 2673 4655 4656 4657 842 843 2673 2674 2676 2677 4657 4658 4659 844 4660 4661 807 838 839 841 845 846 2672 2679 2680 2682 2683 4663 4664 4711 4712 807 838 845 846 847 852 854 856 857 863 869 2683 2684 2695 2696 2699 2700 2713 4665 4666 4675 4676 4684 4687 4688 4695 4712 846 847 4665 4712 848 849 2685 4667 848 849 851 852 854 2685 2686 2689 2690 4667 4668 4669 4672 850 2687 2690 849 851 854 2687 2688 2690 2693 4669 4670 4673 807 846 849 852 854 857 2689 2690 2701 4671 4672 4681 807 853 854 2691 2697 2698 4676 4677 4678 4679 807 846 849 851 852 853 854 2690 2693 2694 2696 2697 4673 4674 4675 4676 855 4676 4677 846 856 857 858 2699 2705 2706 4684 4685 4686 4687 807 846 852 856 857 2701 2702 2704 2705 4681 4682 4683 4684 856 858 4684 4685 859 860 862 2707 2711 2712 4693 4709 4710 859 860 861 862 2710 2711 2727 2728 4689 4690 4691 860 861 863 2715 2716 2718 2727 4689 4697 4698 859 860 862 4691 4692 4709 846 861 863 865 2713 2714 2717 2718 4695 4696 4697 4700 864 2715 2718 863 865 866 2717 2718 2719 4699 4700 4701 865 866 868 2719 2720 2722 2723 4701 4702 4703 867 868 4704 4705 866 867 868 2723 2724 2725 4703 4704 4705 4706 4707 497 536 776 807 838 846 869 870 872 876 900 907 908 915 916 919 2729 2805 2806 2815 2816 2827 2828 4713 4800 4803 4804 4807 4808 4835 4836 869 870 872 873 2729 2730 2733 2734 4713 4714 4715 4718 871 2731 2734 869 870 872 876 2731 2732 2734 2741 4715 4716 4725 870 873 874 2733 2734 2735 4717 4718 4719 873 874 2735 2736 2738 2739 4719 4720 4721 875 4722 4723 838 869 872 876 877 2741 2742 2744 2745 4725 4726 4773 4774 838 876 877 885 887 888 894 2745 2746 2757 2758 2761 2762 2775 4727 4728 4737 4738 4749 4750 4757 4774 878 4727 4774 879 880 2747 4729 879 880 882 883 2747 2748 2751 4729 4730 4731 4734 881 2749 2752 880 882 883 885 2749 2750 2751 2752 2755 4731 4732 4735 880 882 883 885 888 2751 2752 2763 4733 4734 4743 838 884 885 2753 2759 2760 4738 4739 4740 4741 838 877 882 883 884 885 888 2755 2756 2758 2759 4735 4736 4737 4738 886 4738 4739 838 877 887 888 2761 2767 2768 4746 4747 4748 4749 838 877 883 885 887 888 2763 2764 2766 2767 4743 4744 4745 4746 889 4746 4747 890 891 893 2769 2773 2774 4755 4771 4772 890 891 892 893 2772 2773 2789 2790 4751 4752 4753 891 892 894 2777 2778 2780 2789 4751 4759 4760 890 891 893 4753 4754 4771 877 892 894 896 2775 2776 2779 2780 4757 4758 4759 4762 895 2777 2780 894 896 897 2779 2780 2781 4761 4762 4763 896 897 898 899 2781 2782 2784 2785 4763 4764 4765 897 898 899 4765 4766 4767 897 898 899 2785 2786 2787 4767 4768 4769 869 900 901 907 908 934 938 939 946 947 950 2791 2867 2868 2877 2878 2889 2890 4775 4862 4865 4866 4869 4870 4897 4898 900 901 903 904 907 2791 2792 2795 2796 4775 4776 4777 4780 902 2793 2796 901 903 907 2793 2794 2796 2803 4777 4778 4787 901 904 905 2795 2796 2797 4779 4780 4781 904 905 2797 2798 2800 2801 4781 4782 4783 906 4784 4785 869 900 901 903 907 908 2796 2803 2804 2806 2807 4787 4788 4835 869 900 907 908 909 916 918 919 925 2807 2808 2819 2820 2823 2824 2837 4789 4790 4799 4800 4808 4811 4812 4819 4835 4836 908 909 4789 4836 910 911 2809 4791 910 911 913 914 2809 2810 2813 4791 4792 4793 4796 912 2811 2814 911 913 914 916 2811 2812 2813 2814 2817 4793 4794 4797 911 913 914 916 919 2813 2814 2825 4795 4796 4805 869 915 916 2815 2821 2822 4800 4801 4802 4803 869 908 913 914 915 916 919 2817 2818 2820 2821 4797 4798 4799 4800 917 4800 4801 908 918 919 920 2823 2829 2830 4808 4809 4810 4811 869 908 914 916 918 919 2825 2826 2828 2829 4805 4806 4807 4808 918 920 4808 4809 921 922 924 2831 2835 2836 4817 4833 4834 921 922 923 924 2834 2835 2851 2852 4813 4814 4815 922 923 925 2839 2840 2842 2851 4813 4821 4822 921 922 924 4815 4816 4833 908 923 925 927 2837 2838 2841 2842 4819 4820 4821 4824 926 2839 2842 925 927 928 2841 2842 2843 4823 4824 4825 927 928 930 2843 2844 2846 2847 4825 4826 4827 929 930 4828 4829 928 929 930 2847 2848 2849 4827 4828 4829 4830 4831 931 932 969 970 975 977 980 981 2853 2929 2930 2939 2940 2951 2952 4837 4927 4928 4931 4932 4959 4960 931 932 934 935 2853 2854 2857 2858 4837 4838 4839 4842 933 2855 2858 900 932 934 938 939 2855 2856 2858 2865 4839 4840 4849 932 935 936 2857 2858 2859 4841 4842 4843 935 936 2859 2860 2862 2863 4843 4844 4845 937 4846 4847 900 934 938 939 2865 2866 2868 2869 4849 4850 4897 900 934 938 939 940 947 949 950 956 2869 2870 2881 2882 2885 2886 2899 4851 4852 4861 4862 4870 4873 4874 4881 4897 4898 939 940 4851 4898 941 942 2871 4853 941 942 944 945 2871 2872 2875 4853 4854 4855 4858 943 2873 2876 942 944 945 947 950 2873 2874 2875 2876 2879 4855 4856 4859 942 944 945 950 2875 2876 2887 4857 4858 4867 900 946 947 2877 2883 2884 4862 4863 4864 4865 900 939 944 946 947 950 2879 2880 2882 2883 4859 4860 4861 4862 948 4862 4863 939 949 950 951 2885 2891 2892 4870 4871 4872 4873 900 939 944 945 947 949 950 2887 2888 2890 2891 4867 4868 4869 4870 949 951 4870 4871 952 953 955 2893 2897 2898 4879 4895 4896 952 953 954 955 2896 2897 2913 2914 4875 4876 4877 953 954 956 2901 2902 2904 2913 4875 4883 4884 952 953 955 4877 4878 4895 939 954 956 958 2899 2900 2903 2904 4881 4882 4883 4886 957 2901 2904 956 958 959 2903 2904 2905 4885 4886 4887 958 959 961 2905 2906 2908 2909 4887 4888 4889 960 961 4890 4891 959 960 961 2909 2910 2911 4889 4890 4891 4892 4893 962 2915 4899 963 965 966 2915 2916 2919 4899 4900 4901 4904 964 2917 2920 963 965 966 969 2917 2918 2919 2920 2927 4901 4902 4911 963 965 966 967 2919 2920 2921 4903 4904 4905 966 967 2921 2922 2924 2925 4905 4906 4907 968 4908 4909 931 965 969 970 2927 2928 2930 2931 4911 4912 4959 931 969 970 975 977 978 980 981 987 2931 2932 2943 2944 2947 2948 2961 4913 4914 4923 4924 4935 4936 4943 4959 4960 971 4913 4960 972 2933 4915 973 975 976 2933 2934 2937 4915 4916 4917 4920 974 2935 2938 931 970 973 975 976 977 978 981 2935 2936 2937 2938 2941 4917 4918 4921 973 975 976 981 2937 2938 2949 4919 4920 4929 931 970 975 977 978 979 2939 2945 2946 4923 4924 4925 4926 4927 970 975 977 978 2941 2942 2944 2945 4921 4922 4923 977 979 4924 4925 931 970 980 981 982 2947 2953 2954 4931 4932 4933 4934 4935 931 970 975 976 980 981 2949 2950 2952 2953 4929 4930 4931 980 982 4932 4933 983 984 986 2955 2959 2960 4939 4940 4941 4957 4958 983 984 985 2958 2959 2975 2976 4937 4938 4939 984 985 987 2963 2964 2966 2975 4937 4945 4946 983 986 4940 4957 970 985 987 989 2961 2962 2965 2966 4943 4944 4945 4948 988 2963 2966 987 989 990 2965 2966 2967 4947 4948 4949 989 990 991 992 2967 2968 2970 2971 4949 4950 4951 990 991 992 4951 4952 4953 990 991 992 2971 2972 2973 4953 4954 4955 1 2 993 994 2 993 994 3 4 995 996 998 4 995 996 2 5 997 998 2 3 4 5 995 997 998 5 6 999 1000 2983 6 999 1000 1001 1002 6 1001 1002 6 1003 1004 1003 1004 4 8 1005 1006 8 1005 1006 1007 1008 8 1007 1008 8 9 1009 1010 9 1009 1010 11 12 1011 1012 2993 12 1011 1012 13 14 1013 1014 1016 14 1013 1014 12 14 15 1015 1016 13 14 15 1013 1015 1016 16 1017 1018 1017 1018 14 17 1019 1020 17 1019 1020 9 1021 1022 9 17 1021 1022 16 17 1023 1024 16 1023 1024 9 19 1025 1026 9 1025 1026 15 20 1027 1028 20 1027 1028 1029 1030 20 1029 1030 19 20 1031 1032 19 1031 1032 22 1033 1034 1033 1034 1035 1036 23 1035 1036 22 23 1037 1038 22 1037 1038 9 26 1039 1040 26 1039 1040 24 27 1041 1042 1044 24 1041 1042 26 28 1043 1044 24 26 27 28 1041 1043 1044 28 29 1045 1046 29 1045 1046 1047 1048 29 1047 1048 29 31 1049 1050 31 1049 1050 31 1051 1052 1051 1052 23 24 1053 1054 23 1053 1054 32 33 1055 1056 33 1055 1056 34 35 1057 1058 1060 35 1057 1058 33 35 36 1059 1060 34 35 36 1057 1059 1060 36 37 1061 1062 37 1061 1062 1063 1064 37 1063 1064 37 1065 1066 1065 1066 35 39 1067 1068 39 1067 1068 1 1069 1070 1 39 1069 1070 39 40 1071 1072 40 1071 1072 42 43 1073 1074 43 1073 1074 44 45 1075 1076 1078 45 1075 1076 43 46 1077 1078 43 44 45 46 48 1075 1077 1078 1 47 1079 1080 1 1079 1080 45 48 1081 1082 48 1081 1082 40 1083 1084 40 48 1083 1084 47 48 1085 1086 47 1085 1086 40 50 1087 1088 40 1087 1088 46 51 1089 1090 51 1089 1090 1 1091 1092 1 51 1091 1092 50 51 1093 1094 50 1093 1094 53 1095 1096 1095 1096 1097 1098 54 1097 1098 53 54 1099 1100 53 1099 1100 40 57 1101 1102 57 1101 1102 55 58 1103 1104 1106 55 1103 1104 57 59 1105 1106 55 57 58 59 1103 1105 1106 59 60 1107 1108 60 1107 1108 1109 1110 60 1109 1110 60 62 1111 1112 62 1111 1112 62 1113 1114 1113 1114 54 55 1115 1116 54 1115 1116 63 64 1117 1118 64 1117 1118 65 66 1119 1120 1122 66 1119 1120 64 67 1121 1122 64 65 66 67 70 1119 1121 1122 67 68 1123 1124 68 1123 1124 1125 1126 68 1125 1126 68 1127 1128 1127 1128 66 70 1129 1130 70 1129 1130 32 1131 1132 32 70 1131 1132 70 71 1133 1134 71 1133 1134 73 74 1135 1136 74 1135 1136 75 76 1137 1138 1140 76 1137 1138 74 77 1139 1140 74 75 76 77 1137 1139 1140 32 78 1141 1142 32 1141 1142 76 79 1143 1144 79 1143 1144 71 1145 1146 71 79 1145 1146 78 79 1147 1148 78 1147 1148 71 81 1149 1150 71 1149 1150 77 82 1151 1152 82 1151 1152 32 1153 1154 32 82 1153 1154 81 82 1155 1156 81 1155 1156 84 1157 1158 1157 1158 1159 1160 85 1159 1160 84 85 1161 1162 84 1161 1162 71 88 1163 1164 88 1163 1164 86 89 1165 1166 1168 86 1165 1166 88 90 1167 1168 86 88 89 90 1165 1167 1168 90 91 1169 1170 91 1169 1170 1171 1172 91 1171 1172 91 93 1173 1174 93 1173 1174 93 1175 1176 1175 1176 85 86 1177 1178 85 1177 1178 94 95 1179 1180 95 1179 1180 96 97 1181 1182 1184 97 1181 1182 95 98 1183 1184 95 96 97 98 1181 1183 1184 98 99 1185 1186 99 1185 1186 1187 1188 99 1187 1188 99 1189 1190 1189 1190 97 101 1191 1192 101 1191 1192 63 1193 1194 63 101 1193 1194 101 102 1195 1196 102 1195 1196 104 105 1197 1198 105 1197 1198 106 107 1199 1200 1202 107 1199 1200 105 108 1201 1202 105 106 107 108 110 113 1199 1201 1202 63 109 1203 1204 63 1203 1204 107 110 1205 1206 110 1205 1206 102 1207 1208 102 110 1207 1208 109 110 1209 1210 109 1209 1210 102 112 1211 1212 102 1211 1212 108 113 1213 1214 113 1213 1214 63 1215 1216 63 113 1215 1216 112 113 1217 1218 112 1217 1218 115 1219 1220 1219 1220 1221 1222 116 1221 1222 115 116 1223 1224 115 1223 1224 102 119 1225 1226 119 1225 1226 117 120 1227 1228 117 1227 1228 119 121 1229 1230 117 119 120 121 1229 1230 121 122 1231 1232 122 1231 1232 1233 1234 122 1233 1234 122 124 1235 1236 124 1235 1236 124 1237 1238 1237 1238 116 117 1239 1240 116 1239 1240 125 126 1241 1242 126 1241 1242 127 128 1243 1244 128 1243 1244 126 129 1245 1246 126 127 128 129 1245 1246 129 130 1247 1248 130 1247 1248 1249 1250 130 1249 1250 130 1251 1252 1251 1252 128 132 1253 1254 132 1253 1254 94 1255 1256 94 132 1255 1256 132 133 1257 1258 133 1257 1258 135 136 1259 1260 3241 136 1259 1260 137 138 1261 1262 1264 138 1261 1262 136 138 139 1263 1264 137 138 139 1261 1263 1264 94 140 1265 1266 94 1265 1266 138 141 1267 1268 141 1267 1268 133 1269 1270 133 141 1269 1270 140 141 1271 1272 140 1271 1272 133 143 1273 1274 133 1273 1274 139 144 1275 1276 144 1275 1276 94 1277 1278 94 144 1277 1278 143 144 1279 1280 143 1279 1280 146 1281 1282 1281 1282 1283 1284 147 1283 1284 146 147 1285 1286 146 1285 1286 133 150 1287 1288 150 1287 1288 148 150 151 1289 1290 148 1289 1290 150 152 1291 1292 150 151 152 1291 1292 152 153 1293 1294 153 1293 1294 1295 1296 153 1295 1296 153 155 1297 1298 155 1297 1298 155 1299 1300 1299 1300 147 148 1301 1302 147 1301 1302 156 157 1303 1304 157 1303 1304 158 159 1305 1306 159 1305 1306 157 159 160 1307 1308 158 159 160 1307 1308 160 161 1309 1310 161 1309 1310 1311 1312 161 1311 1312 161 1313 1314 1313 1314 159 163 1315 1316 163 1315 1316 125 1317 1318 125 163 1317 1318 163 164 1319 1320 164 1319 1320 166 167 1321 1322 3303 167 1321 1322 168 169 1323 1324 169 1323 1324 167 170 1325 1326 167 168 169 170 1325 1326 125 171 1327 1328 125 1327 1328 169 172 1329 1330 172 1329 1330 164 1331 1332 164 172 1331 1332 171 172 1333 1334 171 1333 1334 164 174 1335 1336 164 1335 1336 170 175 1337 1338 175 1337 1338 125 1339 1340 125 175 1339 1340 174 175 1341 1342 174 1341 1342 177 1343 1344 1343 1344 1345 1346 178 1345 1346 177 178 1347 1348 177 1347 1348 164 181 1349 1350 181 1349 1350 179 182 1351 1352 1354 179 1351 1352 181 183 1353 1354 179 181 182 183 1351 1353 1354 183 184 1355 1356 184 1355 1356 1357 1358 184 1357 1358 184 186 1359 1360 186 1359 1360 186 1361 1362 1361 1362 178 179 1363 1364 178 1363 1364 187 188 1365 1366 188 1365 1366 189 190 1367 1368 190 1367 1368 188 190 191 1369 1370 189 190 191 1369 1370 191 192 1371 1372 192 1371 1372 1373 1374 192 1373 1374 192 1375 1376 1375 1376 190 194 1377 1378 194 1377 1378 156 1379 1380 156 194 1379 1380 194 195 1381 1382 195 1381 1382 197 198 1383 1384 3365 198 1383 1384 199 200 1385 1386 1388 200 1385 1386 198 201 1387 1388 198 199 200 201 203 206 1385 1387 1388 156 202 1389 1390 156 1389 1390 200 203 1391 1392 203 1391 1392 195 1393 1394 195 203 1393 1394 202 203 1395 1396 202 1395 1396 195 205 1397 1398 195 1397 1398 201 206 1399 1400 206 1399 1400 156 1401 1402 156 206 1401 1402 205 206 1403 1404 205 1403 1404 208 1405 1406 1405 1406 1407 1408 209 1407 1408 208 209 1409 1410 208 1409 1410 195 212 1411 1412 212 1411 1412 210 213 1413 1414 210 1413 1414 212 214 1415 1416 210 212 213 214 1415 1416 3395 214 215 1417 1418 215 1417 1418 1419 1420 215 1419 1420 215 217 1421 1422 217 1421 1422 217 1423 1424 1423 1424 209 210 1425 1426 209 1425 1426 218 219 1427 1428 219 1427 1428 220 221 1429 1430 221 1429 1430 219 222 1431 1432 219 220 221 222 1431 1432 222 223 1433 1434 223 1433 1434 1435 1436 223 1435 1436 223 1437 1438 1437 1438 221 225 1439 1440 225 1439 1440 187 1441 1442 187 225 1441 1442 225 226 1443 1444 226 1443 1444 228 229 1445 1446 229 1445 1446 230 231 1447 1448 1450 231 1447 1448 229 232 1449 1450 229 230 231 232 1447 1449 1450 187 233 1451 1452 187 1451 1452 231 234 1453 1454 234 1453 1454 226 1455 1456 226 234 1455 1456 233 234 1457 1458 233 1457 1458 226 236 1459 1460 226 1459 1460 232 237 1461 1462 237 1461 1462 187 1463 1464 187 237 1463 1464 236 237 1465 1466 236 1465 1466 239 1467 1468 1467 1468 1469 1470 240 1469 1470 239 240 1471 1472 239 1471 1472 226 243 1473 1474 243 1473 1474 241 244 1475 1476 1478 241 1475 1476 243 245 1477 1478 241 243 244 245 1475 1477 1478 245 246 1479 1480 246 1479 1480 1481 1482 246 1481 1482 246 248 1483 1484 248 1483 1484 248 1485 1486 1485 1486 240 241 1487 1488 240 1487 1488 249 250 1489 1490 250 1489 1490 251 252 1491 1492 1494 252 1491 1492 250 253 1493 1494 250 251 252 253 1491 1493 1494 253 254 1495 1496 254 1495 1496 1497 1498 254 1497 1498 254 1499 1500 1499 1500 252 256 1501 1502 256 1501 1502 218 1503 1504 218 256 1503 1504 256 257 1505 1506 257 1505 1506 259 260 1507 1508 260 1507 1508 261 262 1509 1510 1512 262 1509 1510 260 263 1511 1512 260 261 262 263 265 268 1509 1511 1512 218 264 1513 1514 218 1513 1514 262 265 1515 1516 265 1515 1516 257 1517 1518 257 265 1517 1518 264 265 1519 1520 264 1519 1520 257 267 1521 1522 257 1521 1522 263 268 1523 1524 268 1523 1524 218 1525 1526 218 268 1525 1526 267 268 1527 1528 267 1527 1528 270 1529 1530 1529 1530 1531 1532 271 1531 1532 270 271 1533 1534 270 1533 1534 257 274 1535 1536 274 1535 1536 272 275 1537 1538 1540 272 1537 1538 274 276 1539 1540 272 274 275 276 1537 1539 1540 276 277 1541 1542 277 1541 1542 1543 1544 277 1543 1544 277 279 1545 1546 279 1545 1546 279 1547 1548 1547 1548 271 272 1549 1550 271 1549 1550 280 281 1551 1552 281 1551 1552 282 283 1553 1554 1556 283 1553 1554 281 284 1555 1556 281 282 283 284 1553 1555 1556 284 285 1557 1558 285 1557 1558 1559 1560 285 1559 1560 285 1561 1562 1561 1562 283 287 1563 1564 287 1563 1564 249 1565 1566 249 287 1565 1566 287 288 1567 1568 288 1567 1568 290 291 1569 1570 291 1569 1570 292 293 1571 1572 1574 293 1571 1572 291 294 1573 1574 291 292 293 294 1571 1573 1574 249 295 1575 1576 249 1575 1576 293 296 1577 1578 296 1577 1578 288 1579 1580 288 296 1579 1580 295 296 1581 1582 295 1581 1582 288 298 1583 1584 288 1583 1584 294 299 1585 1586 299 1585 1586 249 1587 1588 249 299 1587 1588 298 299 1589 1590 298 1589 1590 301 1591 1592 1591 1592 1593 1594 302 1593 1594 301 302 1595 1596 301 1595 1596 288 305 1597 1598 305 1597 1598 303 306 1599 1600 1602 303 1599 1600 305 307 1601 1602 303 305 306 307 1599 1601 1602 307 308 1603 1604 308 1603 1604 1605 1606 308 1605 1606 308 310 1607 1608 310 1607 1608 310 1609 1610 1609 1610 302 303 1611 1612 302 1611 1612 311 312 1613 1614 312 1613 1614 313 314 1615 1616 1618 314 1615 1616 312 315 1617 1618 312 313 314 315 1615 1617 1618 315 316 1619 1620 316 1619 1620 1621 1622 316 1621 1622 316 1623 1624 1623 1624 314 318 1625 1626 318 1625 1626 280 1627 1628 280 318 1627 1628 318 319 1629 1630 319 1629 1630 321 322 1631 1632 322 1631 1632 323 324 1633 1634 1636 324 1633 1634 322 324 325 1635 1636 323 324 325 1633 1635 1636 280 326 1637 1638 280 1637 1638 324 327 1639 1640 327 1639 1640 319 1641 1642 319 327 1641 1642 326 327 1643 1644 326 1643 1644 319 329 1645 1646 319 1645 1646 325 330 1647 1648 330 1647 1648 280 1649 1650 280 330 1649 1650 329 330 1651 1652 329 1651 1652 332 1653 1654 1653 1654 1655 1656 333 1655 1656 332 333 1657 1658 332 1657 1658 319 336 1659 1660 336 1659 1660 334 337 1661 1662 1664 334 1661 1662 336 338 1663 1664 334 336 337 338 1661 1663 1664 338 339 1665 1666 339 1665 1666 1667 1668 339 1667 1668 339 341 1669 1670 341 1669 1670 341 1671 1672 1671 1672 333 334 1673 1674 333 1673 1674 342 343 1675 1676 343 1675 1676 344 345 1677 1678 1680 345 1677 1678 343 346 1679 1680 343 344 345 346 1677 1679 1680 346 347 1681 1682 347 1681 1682 1683 1684 347 1683 1684 347 1685 1686 1685 1686 345 349 1687 1688 349 1687 1688 311 1689 1690 311 349 1689 1690 349 350 1691 1692 350 1691 1692 352 353 1693 1694 353 1693 1694 354 355 1695 1696 1698 355 1695 1696 353 355 356 1697 1698 354 355 356 1695 1697 1698 311 357 1699 1700 311 1699 1700 355 358 1701 1702 358 1701 1702 350 1703 1704 350 358 1703 1704 357 358 1705 1706 357 1705 1706 350 360 1707 1708 350 1707 1708 356 361 1709 1710 361 1709 1710 311 1711 1712 311 361 1711 1712 360 361 1713 1714 360 1713 1714 363 1715 1716 1715 1716 1717 1718 364 1717 1718 363 364 1719 1720 363 1719 1720 350 367 1721 1722 367 1721 1722 365 368 1723 1724 1726 365 1723 1724 367 369 1725 1726 365 367 368 369 1723 1725 1726 369 370 1727 1728 370 1727 1728 1729 1730 370 1729 1730 370 372 1731 1732 372 1731 1732 372 1733 1734 1733 1734 364 365 1735 1736 364 1735 1736 373 374 1737 1738 374 1737 1738 375 376 1739 1740 1742 376 1739 1740 374 377 1741 1742 374 375 376 377 1739 1741 1742 377 378 1743 1744 378 1743 1744 1745 1746 378 1745 1746 378 1747 1748 1747 1748 376 380 1749 1750 380 1749 1750 342 1751 1752 342 380 1751 1752 380 381 1753 1754 381 1753 1754 383 384 1755 1756 384 1755 1756 385 386 1757 1758 1760 386 1757 1758 384 386 387 1759 1760 385 386 387 1757 1759 1760 342 388 1761 1762 342 1761 1762 386 389 1763 1764 389 1763 1764 381 1765 1766 381 389 1765 1766 388 389 1767 1768 388 1767 1768 381 391 1769 1770 381 1769 1770 387 392 1771 1772 392 1771 1772 342 1773 1774 342 392 1773 1774 391 392 1775 1776 391 1775 1776 394 1777 1778 1777 1778 1779 1780 395 1779 1780 394 395 1781 1782 394 1781 1782 381 398 1783 1784 398 1783 1784 396 399 1785 1786 1788 396 1785 1786 398 400 1787 1788 396 398 399 400 1785 1787 1788 400 401 1789 1790 401 1789 1790 1791 1792 401 1791 1792 401 403 1793 1794 403 1793 1794 403 1795 1796 1795 1796 395 396 1797 1798 395 1797 1798 404 405 1799 1800 405 1799 1800 406 407 1801 1802 1804 407 1801 1802 405 408 1803 1804 405 406 407 408 1801 1803 1804 408 409 1805 1806 409 1805 1806 1807 1808 409 1807 1808 409 1809 1810 1809 1810 407 411 1811 1812 411 1811 1812 373 1813 1814 373 411 1813 1814 411 412 1815 1816 412 1815 1816 414 415 1817 1818 415 1817 1818 416 417 1819 1820 1822 417 1819 1820 415 417 418 1821 1822 416 417 418 1819 1821 1822 373 419 1823 1824 373 1823 1824 417 420 1825 1826 420 1825 1826 412 1827 1828 412 420 1827 1828 419 420 1829 1830 419 1829 1830 412 422 1831 1832 412 1831 1832 418 423 1833 1834 423 1833 1834 373 1835 1836 373 423 1835 1836 422 423 1837 1838 422 1837 1838 425 1839 1840 1839 1840 1841 1842 426 1841 1842 425 426 1843 1844 425 1843 1844 412 429 1845 1846 429 1845 1846 427 430 1847 1848 1850 427 1847 1848 429 431 1849 1850 427 429 430 431 1847 1849 1850 431 432 1851 1852 432 1851 1852 1853 1854 432 1853 1854 432 434 1855 1856 434 1855 1856 434 1857 1858 1857 1858 426 427 1859 1860 426 1859 1860 435 436 1861 1862 436 1861 1862 437 438 1863 1864 1866 438 1863 1864 436 439 1865 1866 436 437 438 439 442 1863 1865 1866 439 440 1867 1868 440 1867 1868 1869 1870 440 1869 1870 440 1871 1872 1871 1872 438 442 1873 1874 442 1873 1874 404 1875 1876 404 442 1875 1876 442 443 1877 1878 443 1877 1878 445 446 1879 1880 446 1879 1880 447 448 1881 1882 1884 448 1881 1882 446 449 1883 1884 446 447 448 449 451 1881 1883 1884 404 450 1885 1886 404 1885 1886 448 451 1887 1888 451 1887 1888 443 1889 1890 443 451 1889 1890 450 451 1891 1892 450 1891 1892 443 453 1893 1894 443 1893 1894 449 454 1895 1896 454 1895 1896 404 1897 1898 404 454 1897 1898 453 454 1899 1900 453 1899 1900 456 1901 1902 1901 1902 1903 1904 457 1903 1904 456 457 1905 1906 456 1905 1906 443 460 1907 1908 460 1907 1908 458 461 1909 1910 1912 458 1909 1910 460 462 1911 1912 458 460 461 462 1909 1911 1912 462 463 1913 1914 463 1913 1914 1915 1916 463 1915 1916 463 465 1917 1918 465 1917 1918 465 1919 1920 1919 1920 457 458 1921 1922 457 1921 1922 466 467 1923 1924 467 1923 1924 468 469 1925 1926 1928 469 1925 1926 467 470 1927 1928 467 468 469 470 473 1925 1927 1928 470 471 1929 1930 471 1929 1930 1931 1932 471 1931 1932 471 1933 1934 1933 1934 469 473 1935 1936 473 1935 1936 435 1937 1938 435 473 1937 1938 473 474 1939 1940 474 1939 1940 476 477 1941 1942 477 1941 1942 478 479 1943 1944 1946 479 1943 1944 477 479 480 1945 1946 478 479 480 1943 1945 1946 435 481 1947 1948 435 1947 1948 479 482 1949 1950 482 1949 1950 474 1951 1952 474 482 1951 1952 481 482 1953 1954 481 1953 1954 474 484 1955 1956 474 1955 1956 480 485 1957 1958 485 1957 1958 435 1959 1960 435 485 1959 1960 484 485 1961 1962 484 1961 1962 487 1963 1964 1963 1964 1965 1966 488 1965 1966 487 488 1967 1968 487 1967 1968 474 491 1969 1970 491 1969 1970 489 492 1971 1972 1974 489 1971 1972 491 493 1973 1974 489 491 492 493 1971 1973 1974 3953 493 494 1975 1976 494 1975 1976 1977 1978 494 1977 1978 494 496 1979 1980 496 1979 1980 496 1981 1982 1981 1982 488 489 1983 1984 488 1983 1984 497 498 1985 1986 498 1985 1986 499 500 1987 1988 1990 500 1987 1988 498 501 1989 1990 498 499 500 501 504 1987 1989 1990 501 502 1991 1992 502 1991 1992 1993 1994 502 1993 1994 502 1995 1996 1995 1996 500 504 1997 1998 504 1997 1998 466 1999 2000 466 504 1999 2000 504 505 2001 2002 505 2001 2002 507 508 2003 2004 508 2003 2004 509 510 2005 2006 2008 510 2005 2006 508 510 511 2007 2008 509 510 511 2005 2007 2008 466 512 2009 2010 466 2009 2010 510 513 2011 2012 513 2011 2012 505 2013 2014 505 513 2013 2014 512 513 2015 2016 512 2015 2016 505 515 2017 2018 505 2017 2018 511 516 2019 2020 516 2019 2020 466 2021 2022 466 516 2021 2022 515 516 2023 2024 515 2023 2024 518 2025 2026 2025 2026 2027 2028 519 2027 2028 518 519 2029 2030 518 2029 2030 505 522 2031 2032 522 2031 2032 520 523 2033 2034 2036 520 2033 2034 522 524 2035 2036 520 522 523 524 2033 2035 2036 524 525 2037 2038 525 2037 2038 2039 2040 525 2039 2040 525 527 2041 2042 527 2041 2042 527 2043 2044 2043 2044 519 520 2045 2046 519 2045 2046 528 529 2047 2048 529 2047 2048 530 531 2049 2050 2052 531 2049 2050 529 532 2051 2052 529 530 531 532 2049 2051 2052 532 533 2053 2054 533 2053 2054 2055 2056 533 2055 2056 533 2057 2058 2057 2058 531 535 2059 2060 535 2059 2060 497 2061 2062 497 535 2061 2062 535 536 2063 2064 536 2063 2064 538 539 2065 2066 539 2065 2066 540 541 2067 2068 2070 541 2067 2068 539 542 2069 2070 539 540 541 542 2067 2069 2070 497 543 2071 2072 497 2071 2072 541 544 2073 2074 544 2073 2074 536 2075 2076 536 544 2075 2076 543 544 2077 2078 543 2077 2078 536 546 2079 2080 536 2079 2080 542 547 2081 2082 547 2081 2082 497 2083 2084 497 547 2083 2084 546 547 2085 2086 546 2085 2086 549 2087 2088 2087 2088 2089 2090 550 2089 2090 549 550 2091 2092 549 2091 2092 536 553 2093 2094 553 2093 2094 551 554 2095 2096 2098 551 2095 2096 553 555 2097 2098 551 553 554 555 2095 2097 2098 555 556 2099 2100 556 2099 2100 2101 2102 556 2101 2102 556 558 2103 2104 558 2103 2104 558 2105 2106 2105 2106 550 551 2107 2108 550 2107 2108 559 560 2109 2110 560 2109 2110 561 562 2111 2112 2114 562 2111 2112 560 563 2113 2114 560 561 562 563 566 2111 2113 2114 563 564 2115 2116 564 2115 2116 2117 2118 564 2117 2118 564 2119 2120 2119 2120 562 566 2121 2122 566 2121 2122 528 2123 2124 528 566 2123 2124 566 567 2125 2126 567 2125 2126 569 570 2127 2128 570 2127 2128 571 572 2129 2130 2132 572 2129 2130 570 572 573 2131 2132 571 572 573 2129 2131 2132 528 574 2133 2134 528 2133 2134 572 575 2135 2136 575 2135 2136 567 2137 2138 567 575 2137 2138 574 575 2139 2140 574 2139 2140 567 577 2141 2142 567 2141 2142 573 578 2143 2144 578 2143 2144 528 2145 2146 528 578 2145 2146 577 578 2147 2148 577 2147 2148 580 2149 2150 2149 2150 2151 2152 581 2151 2152 580 581 2153 2154 580 2153 2154 567 584 2155 2156 584 2155 2156 582 585 2157 2158 2160 582 2157 2158 584 586 2159 2160 582 584 585 586 2157 2159 2160 586 587 2161 2162 587 2161 2162 2163 2164 587 2163 2164 587 589 2165 2166 589 2165 2166 589 2167 2168 2167 2168 581 582 2169 2170 581 2169 2170 590 591 2171 2172 591 2171 2172 592 593 2173 2174 2176 593 2173 2174 591 594 2175 2176 591 592 593 594 2173 2175 2176 594 595 2177 2178 595 2177 2178 2179 2180 595 2179 2180 595 2181 2182 2181 2182 593 597 2183 2184 597 2183 2184 559 2185 2186 559 597 2185 2186 597 598 2187 2188 598 2187 2188 600 601 2189 2190 601 2189 2190 602 603 2191 2192 2194 603 2191 2192 601 603 604 2193 2194 602 603 604 2191 2193 2194 559 605 2195 2196 559 2195 2196 603 606 2197 2198 606 2197 2198 598 2199 2200 598 606 2199 2200 605 606 2201 2202 605 2201 2202 598 608 2203 2204 598 2203 2204 604 609 2205 2206 609 2205 2206 559 2207 2208 559 609 2207 2208 608 609 2209 2210 608 2209 2210 611 2211 2212 2211 2212 2213 2214 612 2213 2214 611 612 2215 2216 611 2215 2216 598 615 2217 2218 615 2217 2218 613 616 2219 2220 2222 613 2219 2220 615 617 2221 2222 613 615 616 617 2219 2221 2222 617 618 2223 2224 618 2223 2224 2225 2226 618 2225 2226 618 620 2227 2228 620 2227 2228 620 2229 2230 2229 2230 612 613 2231 2232 612 2231 2232 621 622 2233 2234 622 2233 2234 623 624 2235 2236 2238 624 2235 2236 622 624 625 2237 2238 623 624 625 2235 2237 2238 625 626 2239 2240 626 2239 2240 2241 2242 626 2241 2242 626 2243 2244 2243 2244 624 628 2245 2246 628 2245 2246 590 2247 2248 590 628 2247 2248 628 629 2249 2250 629 2249 2250 631 632 2251 2252 4233 632 2251 2252 633 634 2253 2254 2256 634 2253 2254 632 634 635 2255 2256 633 634 635 2253 2255 2256 590 636 2257 2258 590 2257 2258 634 637 2259 2260 637 2259 2260 629 2261 2262 629 637 2261 2262 636 637 2263 2264 636 2263 2264 629 639 2265 2266 629 2265 2266 635 640 2267 2268 640 2267 2268 590 2269 2270 590 640 2269 2270 639 640 2271 2272 639 2271 2272 642 2273 2274 2273 2274 2275 2276 643 2275 2276 642 643 2277 2278 4258 642 2277 2278 629 646 2279 2280 646 2279 2280 644 647 2281 2282 2284 644 2281 2282 646 648 2283 2284 644 646 647 648 2281 2283 2284 648 649 2285 2286 649 2285 2286 2287 2288 649 2287 2288 649 651 2289 2290 4271 651 2289 2290 651 2291 2292 2291 2292 643 644 2293 2294 643 2293 2294 652 653 2295 2296 653 2295 2296 654 655 2297 2298 2300 655 2297 2298 653 655 656 2299 2300 654 655 656 2297 2299 2300 656 657 2301 2302 657 2301 2302 2303 2304 657 2303 2304 657 2305 2306 2305 2306 655 659 2307 2308 659 2307 2308 621 2309 2310 621 659 2309 2310 659 660 2311 2312 660 2311 2312 662 663 2313 2314 4295 663 2313 2314 664 665 2315 2316 2318 665 2315 2316 663 665 666 2317 2318 664 665 666 2315 2317 2318 621 667 2319 2320 621 2319 2320 665 668 2321 2322 668 2321 2322 660 2323 2324 660 668 2323 2324 667 668 2325 2326 667 2325 2326 660 670 2327 2328 660 2327 2328 666 671 2329 2330 671 2329 2330 621 2331 2332 621 671 2331 2332 670 671 2333 2334 670 2333 2334 673 2335 2336 2335 2336 2337 2338 674 2337 2338 673 674 2339 2340 673 2339 2340 660 677 2341 2342 677 2341 2342 675 678 2343 2344 2346 675 2343 2344 677 679 2345 2346 675 677 678 679 2343 2345 2346 679 680 2347 2348 680 2347 2348 2349 2350 680 2349 2350 680 682 2351 2352 682 2351 2352 682 2353 2354 2353 2354 674 675 2355 2356 674 2355 2356 683 684 2357 2358 684 2357 2358 685 686 2359 2360 2362 686 2359 2360 684 686 687 2361 2362 685 686 687 2359 2361 2362 687 688 2363 2364 4347 688 2363 2364 2365 2366 688 2365 2366 688 2367 2368 2367 2368 686 690 2369 2370 690 2369 2370 652 2371 2372 652 690 2371 2372 690 691 2373 2374 691 2373 2374 693 694 2375 2376 4357 694 2375 2376 695 696 2377 2378 2380 696 2377 2378 694 697 2379 2380 694 695 696 697 699 702 2377 2379 2380 652 698 2381 2382 652 2381 2382 696 699 2383 2384 699 2383 2384 691 2385 2386 691 699 2385 2386 698 699 2387 2388 698 2387 2388 691 701 2389 2390 691 2389 2390 697 702 2391 2392 702 2391 2392 652 2393 2394 652 702 2393 2394 701 702 2395 2396 701 2395 2396 704 2397 2398 2397 2398 2399 2400 705 2399 2400 704 705 2401 2402 4399 704 2401 2402 691 708 2403 2404 708 2403 2404 706 709 2405 2406 2408 706 2405 2406 708 710 2407 2408 706 708 709 710 2405 2407 2408 710 711 2409 2410 711 2409 2410 2411 2412 711 2411 2412 711 713 2413 2414 4394 713 2413 2414 713 2415 2416 2415 2416 705 706 2417 2418 705 2417 2418 714 715 2419 2420 715 2419 2420 716 717 2421 2422 2424 717 2421 2422 715 717 718 2423 2424 716 717 718 2421 2423 2424 718 719 2425 2426 719 2425 2426 2427 2428 719 2427 2428 719 2429 2430 2429 2430 717 721 2431 2432 721 2431 2432 683 2433 2434 683 721 2433 2434 721 722 2435 2436 722 2435 2436 724 725 2437 2438 4419 725 2437 2438 726 727 2439 2440 2442 727 2439 2440 725 728 2441 2442 725 726 727 728 2439 2441 2442 683 729 2443 2444 683 2443 2444 727 730 2445 2446 730 2445 2446 722 2447 2448 722 730 2447 2448 729 730 2449 2450 729 2449 2450 722 732 2451 2452 722 2451 2452 728 733 2453 2454 733 2453 2454 683 2455 2456 683 733 2455 2456 732 733 2457 2458 732 2457 2458 735 2459 2460 2459 2460 2461 2462 736 2461 2462 735 736 2463 2464 735 2463 2464 722 739 2465 2466 739 2465 2466 737 740 2467 2468 2470 737 2467 2468 739 741 2469 2470 737 739 740 741 2467 2469 2470 741 742 2471 2472 742 2471 2472 2473 2474 742 2473 2474 742 744 2475 2476 744 2475 2476 744 2477 2478 2477 2478 736 737 2479 2480 736 2479 2480 745 746 2481 2482 746 2481 2482 747 748 2483 2484 2486 748 2483 2484 746 748 749 2485 2486 747 748 749 2483 2485 2486 749 750 2487 2488 750 2487 2488 2489 2490 750 2489 2490 750 2491 2492 2491 2492 748 752 2493 2494 752 2493 2494 714 2495 2496 714 752 2495 2496 752 753 2497 2498 753 2497 2498 755 756 2499 2500 4481 756 2499 2500 757 758 2501 2502 2504 758 2501 2502 756 758 759 2503 2504 757 758 759 2501 2503 2504 714 760 2505 2506 714 2505 2506 758 761 2507 2508 761 2507 2508 753 2509 2510 753 761 2509 2510 760 761 2511 2512 760 2511 2512 753 763 2513 2514 753 2513 2514 759 764 2515 2516 764 2515 2516 714 2517 2518 714 764 2517 2518 763 764 2519 2520 763 2519 2520 766 2521 2522 2521 2522 2523 2524 767 2523 2524 766 767 2525 2526 4523 766 2525 2526 753 770 2527 2528 770 2527 2528 768 771 2529 2530 2532 768 2529 2530 770 772 2531 2532 768 770 771 772 2529 2531 2532 772 773 2533 2534 773 2533 2534 2535 2536 773 2535 2536 773 775 2537 2538 4519 775 2537 2538 775 2539 2540 2539 2540 767 768 2541 2542 4503 767 2541 2542 776 777 2543 2544 777 2543 2544 778 779 2545 2546 2548 779 2545 2546 777 780 2547 2548 777 778 779 780 783 2545 2547 2548 780 781 2549 2550 781 2549 2550 2551 2552 781 2551 2552 781 2553 2554 2553 2554 779 783 2555 2556 783 2555 2556 745 2557 2558 745 783 2557 2558 783 784 2559 2560 784 2559 2560 786 787 2561 2562 4543 787 2561 2562 788 789 2563 2564 2566 789 2563 2564 787 789 790 2565 2566 788 789 790 2563 2565 2566 745 791 2567 2568 745 2567 2568 789 792 2569 2570 792 2569 2570 784 2571 2572 784 792 2571 2572 791 792 2573 2574 791 2573 2574 784 794 2575 2576 784 2575 2576 790 795 2577 2578 795 2577 2578 745 2579 2580 745 795 2579 2580 794 795 2581 2582 794 2581 2582 797 2583 2584 2583 2584 2585 2586 798 2585 2586 797 798 2587 2588 797 2587 2588 784 801 2589 2590 801 2589 2590 799 802 2591 2592 2594 799 2591 2592 801 803 2593 2594 799 801 802 803 2591 2593 2594 803 804 2595 2596 804 2595 2596 2597 2598 804 2597 2598 804 806 2599 2600 806 2599 2600 806 2601 2602 2601 2602 798 799 2603 2604 798 2603 2604 807 808 2605 2606 808 2605 2606 809 810 2607 2608 2610 810 2607 2608 808 811 2609 2610 808 809 810 811 814 2607 2609 2610 811 812 2611 2612 812 2611 2612 2613 2614 812 2613 2614 812 2615 2616 2615 2616 810 814 2617 2618 814 2617 2618 776 2619 2620 776 814 2619 2620 814 815 2621 2622 815 2621 2622 817 818 2623 2624 4605 818 2623 2624 819 820 2625 2626 2628 820 2625 2626 818 820 821 2627 2628 819 820 821 2625 2627 2628 776 822 2629 2630 776 2629 2630 820 823 2631 2632 823 2631 2632 815 2633 2634 815 823 2633 2634 822 823 2635 2636 822 2635 2636 815 825 2637 2638 815 2637 2638 821 826 2639 2640 826 2639 2640 776 2641 2642 776 826 2641 2642 825 826 2643 2644 825 2643 2644 828 2645 2646 2645 2646 2647 2648 829 2647 2648 828 829 2649 2650 828 2649 2650 815 832 2651 2652 832 2651 2652 830 833 2653 2654 2656 830 2653 2654 832 834 2655 2656 830 832 833 834 2653 2655 2656 834 835 2657 2658 835 2657 2658 2659 2660 835 2659 2660 835 837 2661 2662 837 2661 2662 837 2663 2664 2663 2664 829 830 2665 2666 829 2665 2666 838 839 2667 2668 839 2667 2668 840 841 2669 2670 2672 841 2669 2670 839 842 2671 2672 839 840 841 842 845 2669 2671 2672 842 843 2673 2674 843 2673 2674 2675 2676 843 2675 2676 843 2677 2678 2677 2678 841 845 2679 2680 845 2679 2680 807 2681 2682 807 845 2681 2682 845 846 2683 2684 846 2683 2684 848 849 2685 2686 849 2685 2686 850 851 2687 2688 2690 851 2687 2688 849 852 2689 2690 849 850 851 852 854 2687 2689 2690 807 853 2691 2692 807 2691 2692 851 854 2693 2694 854 2693 2694 846 2695 2696 846 854 2695 2696 853 854 2697 2698 853 2697 2698 846 856 2699 2700 846 2699 2700 852 857 2701 2702 857 2701 2702 807 2703 2704 807 857 2703 2704 856 857 2705 2706 856 2705 2706 859 2707 2708 2707 2708 2709 2710 860 2709 2710 859 860 2711 2712 859 2711 2712 846 863 2713 2714 863 2713 2714 861 864 2715 2716 2718 861 2715 2716 863 865 2717 2718 861 863 864 865 2715 2717 2718 865 866 2719 2720 866 2719 2720 2721 2722 866 2721 2722 866 868 2723 2724 868 2723 2724 868 2725 2726 2725 2726 860 861 2727 2728 860 2727 2728 869 870 2729 2730 870 2729 2730 871 872 2731 2732 2734 872 2731 2732 870 873 2733 2734 870 871 872 873 2731 2733 2734 873 874 2735 2736 874 2735 2736 2737 2738 874 2737 2738 874 2739 2740 2739 2740 872 876 2741 2742 876 2741 2742 838 2743 2744 838 876 2743 2744 876 877 2745 2746 877 2745 2746 879 880 2747 2748 880 2747 2748 881 882 2749 2750 2752 882 2749 2750 880 882 883 2751 2752 881 882 883 2749 2751 2752 838 884 2753 2754 838 2753 2754 882 885 2755 2756 885 2755 2756 877 2757 2758 877 885 2757 2758 884 885 2759 2760 884 2759 2760 877 887 2761 2762 877 2761 2762 883 888 2763 2764 888 2763 2764 838 2765 2766 838 888 2765 2766 887 888 2767 2768 887 2767 2768 890 2769 2770 2769 2770 2771 2772 891 2771 2772 890 891 2773 2774 890 2773 2774 877 894 2775 2776 894 2775 2776 892 895 2777 2778 2780 892 2777 2778 894 896 2779 2780 892 894 895 896 2777 2779 2780 896 897 2781 2782 897 2781 2782 2783 2784 897 2783 2784 897 899 2785 2786 899 2785 2786 899 2787 2788 2787 2788 891 892 2789 2790 891 2789 2790 900 901 2791 2792 901 2791 2792 902 903 2793 2794 2796 903 2793 2794 901 904 2795 2796 901 902 903 904 907 2793 2795 2796 904 905 2797 2798 905 2797 2798 2799 2800 905 2799 2800 905 2801 2802 2801 2802 903 907 2803 2804 907 2803 2804 869 2805 2806 869 907 2805 2806 907 908 2807 2808 908 2807 2808 910 911 2809 2810 911 2809 2810 912 913 2811 2812 2814 913 2811 2812 911 913 914 2813 2814 912 913 914 2811 2813 2814 869 915 2815 2816 869 2815 2816 913 916 2817 2818 916 2817 2818 908 2819 2820 908 916 2819 2820 915 916 2821 2822 915 2821 2822 908 918 2823 2824 908 2823 2824 914 919 2825 2826 919 2825 2826 869 2827 2828 869 919 2827 2828 918 919 2829 2830 918 2829 2830 921 2831 2832 2831 2832 2833 2834 922 2833 2834 921 922 2835 2836 921 2835 2836 908 925 2837 2838 925 2837 2838 923 926 2839 2840 2842 923 2839 2840 925 927 2841 2842 923 925 926 927 2839 2841 2842 927 928 2843 2844 928 2843 2844 2845 2846 928 2845 2846 928 930 2847 2848 930 2847 2848 930 2849 2850 2849 2850 922 923 2851 2852 922 2851 2852 931 932 2853 2854 932 2853 2854 933 934 2855 2856 2858 934 2855 2856 932 935 2857 2858 932 933 934 935 2855 2857 2858 935 936 2859 2860 936 2859 2860 2861 2862 936 2861 2862 936 2863 2864 2863 2864 934 938 2865 2866 938 2865 2866 900 2867 2868 900 938 2867 2868 938 939 2869 2870 939 2869 2870 941 942 2871 2872 942 2871 2872 943 944 2873 2874 2876 944 2873 2874 942 944 945 2875 2876 943 944 945 2873 2875 2876 900 946 2877 2878 900 2877 2878 944 947 2879 2880 947 2879 2880 939 2881 2882 939 947 2881 2882 946 947 2883 2884 946 2883 2884 939 949 2885 2886 939 2885 2886 945 950 2887 2888 950 2887 2888 900 2889 2890 900 950 2889 2890 949 950 2891 2892 949 2891 2892 952 2893 2894 2893 2894 2895 2896 953 2895 2896 952 953 2897 2898 952 2897 2898 939 956 2899 2900 956 2899 2900 954 957 2901 2902 2904 954 2901 2902 956 958 2903 2904 954 956 957 958 2901 2903 2904 958 959 2905 2906 959 2905 2906 2907 2908 959 2907 2908 959 961 2909 2910 961 2909 2910 961 2911 2912 2911 2912 953 954 2913 2914 953 2913 2914 962 963 2915 2916 4899 963 2915 2916 964 965 2917 2918 2920 965 2917 2918 963 965 966 2919 2920 964 965 966 2917 2919 2920 966 967 2921 2922 967 2921 2922 2923 2924 967 2923 2924 967 2925 2926 2925 2926 965 969 2927 2928 969 2927 2928 931 2929 2930 931 969 2929 2930 969 970 2931 2932 970 2931 2932 972 973 2933 2934 4915 973 2933 2934 974 975 2935 2936 2938 975 2935 2936 973 975 976 2937 2938 974 975 976 2935 2937 2938 931 977 2939 2940 931 2939 2940 975 978 2941 2942 978 2941 2942 970 2943 2944 970 978 2943 2944 977 978 2945 2946 977 2945 2946 970 980 2947 2948 970 2947 2948 976 981 2949 2950 981 2949 2950 931 2951 2952 931 981 2951 2952 980 981 2953 2954 980 2953 2954 983 2955 2956 2955 2956 2957 2958 984 2957 2958 983 984 2959 2960 983 2959 2960 970 987 2961 2962 987 2961 2962 985 988 2963 2964 2966 985 2963 2964 987 989 2965 2966 985 987 988 989 2963 2965 2966 989 990 2967 2968 990 2967 2968 2969 2970 990 2969 2970 990 992 2971 2972 992 2971 2972 992 2973 2974 2973 2974 984 985 2975 2976 984 2975 2976 1 2 2977 2978 2 2977 2978 2 4 2979 2980 4 2979 2980 5 2981 2982 2 5 2981 2982 5 6 999 2983 2984 6 2983 2984 6 2985 2986 7 2985 2986 7 2987 2988 2987 2988 4 8 2989 2990 8 2989 2990 9 10 2991 2992 3038 9 2991 2992 11 12 1011 2993 2994 12 2993 2994 12 14 2995 2996 14 2995 2996 15 2997 2998 12 15 2997 2998 14 17 2999 3000 17 2999 3000 9 17 3001 3002 9 17 18 3001 3002 3003 16 17 18 3002 3003 3004 16 3003 3004 16 3005 3006 3005 3006 15 20 3007 3008 20 3007 3008 20 3009 3010 20 21 3009 3010 3011 19 20 21 3010 3011 3012 19 3011 3012 9 19 3013 3014 9 3013 3014 23 24 3015 3016 23 3015 3016 23 3017 3018 23 25 3017 3018 3035 22 3019 3020 3019 3020 9 26 3021 3022 26 3021 3022 24 26 3023 3024 24 3023 3024 28 3025 3026 26 28 3025 3026 28 29 3027 3028 29 3027 3028 29 3029 3030 29 30 31 3029 3030 30 31 3031 3032 31 3031 3032 31 3033 3034 3033 3034 22 23 25 3018 3035 3036 22 3035 3036 8 3037 3038 8 9 10 2991 3037 3038 32 33 3039 3040 33 3039 3040 33 35 3041 3042 35 3041 3042 36 3043 3044 33 36 3043 3044 36 37 3045 3046 37 3045 3046 37 3047 3048 38 3047 3048 38 3049 3050 3049 3050 35 39 3051 3052 39 3051 3052 40 41 3053 3054 3100 40 3053 3054 42 43 3055 3056 43 3055 3056 43 45 3057 3058 45 3057 3058 46 3059 3060 43 46 3059 3060 45 48 3061 3062 48 3061 3062 40 48 3063 3064 1 40 47 48 49 3063 3064 3065 47 49 3064 3065 3066 47 3065 3066 1 47 3067 3068 1 3067 3068 46 51 3069 3070 51 3069 3070 1 50 51 3071 3072 1 50 52 3071 3072 3073 50 52 3072 3073 3074 50 3073 3074 40 50 3075 3076 40 3075 3076 54 55 3077 3078 54 3077 3078 54 3079 3080 53 54 56 3079 3080 53 3081 3082 3081 3082 40 57 3083 3084 57 3083 3084 55 57 3085 3086 55 3085 3086 59 3087 3088 57 59 3087 3088 59 60 3089 3090 60 3089 3090 60 3091 3092 60 61 3091 3092 3093 60 61 62 3092 3093 3094 62 3093 3094 62 3095 3096 3095 3096 53 56 3097 3098 53 3097 3098 1 39 40 3099 3100 1 40 41 3053 3099 3100 63 64 3101 3102 64 3101 3102 64 66 3103 3104 66 3103 3104 67 3105 3106 64 67 3105 3106 67 68 3107 3108 68 3107 3108 68 3109 3110 69 3109 3110 69 3111 3112 3111 3112 66 70 3113 3114 70 3113 3114 71 72 3115 3116 3162 71 3115 3116 73 74 3117 3118 74 3117 3118 74 76 3119 3120 76 3119 3120 77 3121 3122 74 77 3121 3122 76 79 3123 3124 79 3123 3124 71 78 79 3125 3126 71 78 80 3125 3126 3127 78 80 3126 3127 3128 78 3127 3128 32 78 3129 3130 32 3129 3130 77 82 3131 3132 82 3131 3132 32 82 3133 3134 32 71 81 82 83 3133 3134 3135 81 83 3134 3135 3136 81 3135 3136 71 81 3137 3138 71 3137 3138 85 86 3139 3140 85 3139 3140 85 3141 3142 85 87 3141 3142 3159 84 3143 3144 3143 3144 71 88 3145 3146 88 3145 3146 86 88 3147 3148 86 3147 3148 90 3149 3150 88 90 3149 3150 90 91 3151 3152 91 3151 3152 91 3153 3154 91 92 3153 3154 3155 91 92 93 3154 3155 3156 93 3155 3156 93 3157 3158 3157 3158 84 85 87 3142 3159 3160 84 3159 3160 32 70 3161 3162 32 70 71 72 3115 3161 3162 94 95 3163 3164 95 3163 3164 95 97 3165 3166 97 3165 3166 98 3167 3168 95 98 3167 3168 98 99 3169 3170 99 3169 3170 99 3171 3172 100 3171 3172 100 3173 3174 3173 3174 97 101 3175 3176 101 3175 3176 102 103 3177 3178 3224 102 3177 3178 104 105 3179 3180 105 3179 3180 105 107 3181 3182 107 3181 3182 108 3183 3184 105 108 3183 3184 107 110 3185 3186 110 3185 3186 102 109 110 3187 3188 102 109 111 3187 3188 3189 109 111 3188 3189 3190 109 3189 3190 63 109 3191 3192 63 3191 3192 108 113 3193 3194 113 3193 3194 63 113 3195 3196 63 102 112 113 114 3195 3196 3197 112 114 3196 3197 3198 112 3197 3198 102 112 3199 3200 102 3199 3200 116 117 3201 3202 116 3201 3202 116 3203 3204 116 118 3203 3204 3221 115 3205 3206 3205 3206 102 119 3207 3208 119 3207 3208 117 119 3209 3210 117 3209 3210 121 3211 3212 119 121 3211 3212 121 122 3213 3214 122 3213 3214 122 3215 3216 122 123 124 3215 3216 123 124 3217 3218 124 3217 3218 124 3219 3220 3219 3220 115 116 118 3204 3221 3222 115 3221 3222 63 101 102 3223 3224 63 102 103 3177 3223 3224 125 126 3225 3226 126 3225 3226 126 128 3227 3228 128 3227 3228 129 3229 3230 126 129 3229 3230 129 130 3231 3232 130 3231 3232 130 3233 3234 131 3233 3234 131 3235 3236 3235 3236 128 132 3237 3238 132 3237 3238 133 134 3239 3240 3286 133 3239 3240 135 136 1259 3241 3242 136 3241 3242 136 138 3243 3244 138 3243 3244 139 3245 3246 136 139 3245 3246 138 141 3247 3248 141 3247 3248 133 140 141 3249 3250 133 140 142 3249 3250 3251 140 142 3250 3251 3252 140 3251 3252 94 140 3253 3254 94 3253 3254 139 144 3255 3256 144 3255 3256 94 143 144 3257 3258 94 143 145 3257 3258 3259 143 145 3258 3259 3260 143 3259 3260 133 143 3261 3262 133 3261 3262 147 148 3263 3264 147 3263 3264 147 3265 3266 146 147 149 3265 3266 146 3267 3268 3267 3268 133 150 3269 3270 150 3269 3270 148 150 3271 3272 148 3271 3272 152 3273 3274 150 152 3273 3274 152 153 3275 3276 153 3275 3276 153 3277 3278 153 154 3277 3278 3279 153 154 155 3278 3279 3280 155 3279 3280 155 3281 3282 3281 3282 146 149 3283 3284 146 3283 3284 94 132 133 3285 3286 94 133 134 3239 3285 3286 156 157 3287 3288 157 3287 3288 157 159 3289 3290 159 3289 3290 160 3291 3292 157 160 3291 3292 160 161 3293 3294 161 3293 3294 161 3295 3296 162 3295 3296 162 3297 3298 3297 3298 159 163 3299 3300 163 3299 3300 164 165 3301 3302 3348 164 3301 3302 166 167 1321 3303 3304 167 3303 3304 167 169 3305 3306 169 3305 3306 170 3307 3308 167 170 3307 3308 169 172 3309 3310 172 3309 3310 164 172 3311 3312 164 171 172 173 3311 3312 3313 171 173 3312 3313 3314 171 3313 3314 125 171 3315 3316 125 3315 3316 170 175 3317 3318 175 3317 3318 125 174 175 3319 3320 125 174 176 3319 3320 3321 174 176 3320 3321 3322 174 3321 3322 164 174 3323 3324 164 3323 3324 178 179 3325 3326 178 3325 3326 178 3327 3328 178 180 3327 3328 3345 177 3329 3330 3329 3330 164 181 3331 3332 181 3331 3332 179 181 3333 3334 179 3333 3334 183 3335 3336 181 183 3335 3336 183 184 3337 3338 184 3337 3338 184 3339 3340 184 185 3339 3340 3341 184 185 186 3340 3341 3342 186 3341 3342 186 3343 3344 3343 3344 177 178 180 3328 3345 3346 177 3345 3346 125 163 164 3347 3348 125 164 165 3301 3347 3348 187 188 3349 3350 188 3349 3350 188 190 3351 3352 190 3351 3352 191 3353 3354 188 191 3353 3354 191 192 3355 3356 192 3355 3356 192 3357 3358 193 3357 3358 193 3359 3360 3359 3360 190 194 3361 3362 194 3361 3362 195 196 3363 3364 3410 195 3363 3364 197 198 1383 3365 3366 198 3365 3366 198 200 3367 3368 200 3367 3368 201 3369 3370 198 201 3369 3370 200 203 3371 3372 203 3371 3372 195 202 203 3373 3374 195 202 204 3373 3374 3375 202 204 3374 3375 3376 202 3375 3376 156 202 3377 3378 156 3377 3378 201 206 3379 3380 206 3379 3380 156 205 206 3381 3382 156 205 207 3381 3382 3383 205 207 3382 3383 3384 205 3383 3384 195 205 3385 3386 195 3385 3386 209 210 3387 3388 209 3387 3388 209 3389 3390 208 209 211 3389 3390 208 3391 3392 3391 3392 195 212 3393 3394 212 3393 3394 210 212 1416 3395 3396 210 3395 3396 214 3397 3398 212 214 3397 3398 214 215 3399 3400 215 3399 3400 215 3401 3402 215 216 3401 3402 3403 215 216 217 3402 3403 3404 217 3403 3404 217 3405 3406 3405 3406 208 211 3407 3408 208 3407 3408 156 194 195 3409 3410 156 195 196 3363 3409 3410 218 219 3411 3412 219 3411 3412 219 221 3413 3414 221 3413 3414 222 3415 3416 219 222 3415 3416 222 223 3417 3418 223 3417 3418 223 3419 3420 224 3419 3420 224 3421 3422 3421 3422 221 225 3423 3424 225 3423 3424 226 227 3425 3426 3472 226 3425 3426 228 229 3427 3428 229 3427 3428 229 231 3429 3430 231 3429 3430 232 3431 3432 229 232 3431 3432 231 234 3433 3434 234 3433 3434 226 234 3435 3436 187 226 233 234 235 3435 3436 3437 233 235 3436 3437 3438 233 3437 3438 187 233 3439 3440 187 3439 3440 232 237 3441 3442 237 3441 3442 187 237 3443 3444 187 226 236 237 238 3443 3444 3445 236 238 3444 3445 3446 236 3445 3446 226 236 3447 3448 226 3447 3448 240 241 3449 3450 240 3449 3450 240 3451 3452 240 242 3451 3452 3469 239 3453 3454 3453 3454 226 243 3455 3456 243 3455 3456 241 243 3457 3458 241 3457 3458 245 3459 3460 243 245 3459 3460 245 246 3461 3462 246 3461 3462 246 3463 3464 246 247 3463 3464 3465 246 247 248 3464 3465 3466 248 3465 3466 248 3467 3468 3467 3468 239 240 242 3452 3469 3470 239 3469 3470 187 225 226 3471 3472 187 226 227 3425 3471 3472 249 250 3473 3474 250 3473 3474 250 252 3475 3476 252 3475 3476 253 3477 3478 250 253 3477 3478 253 254 3479 3480 254 3479 3480 254 3481 3482 255 3481 3482 255 3483 3484 3483 3484 252 256 3485 3486 256 3485 3486 257 258 3487 3488 3534 257 3487 3488 259 260 3489 3490 260 3489 3490 260 262 3491 3492 262 3491 3492 263 3493 3494 260 263 3493 3494 262 265 3495 3496 265 3495 3496 257 265 3497 3498 218 257 264 265 266 3497 3498 3499 264 266 3498 3499 3500 264 3499 3500 218 264 3501 3502 218 3501 3502 263 268 3503 3504 268 3503 3504 218 268 3505 3506 218 257 267 268 269 3505 3506 3507 267 269 3506 3507 3508 267 3507 3508 257 267 3509 3510 257 3509 3510 271 272 3511 3512 271 3511 3512 271 3513 3514 271 273 3513 3514 3531 270 3515 3516 3515 3516 257 274 3517 3518 274 3517 3518 272 274 3519 3520 272 3519 3520 276 3521 3522 274 276 3521 3522 276 277 3523 3524 277 3523 3524 277 3525 3526 277 278 3525 3526 3527 277 278 279 3526 3527 3528 279 3527 3528 279 3529 3530 3529 3530 270 271 273 3514 3531 3532 270 3531 3532 218 256 3533 3534 218 256 257 258 3487 3533 3534 280 281 3535 3536 281 3535 3536 281 283 3537 3538 283 3537 3538 284 3539 3540 281 284 3539 3540 284 285 3541 3542 285 3541 3542 285 3543 3544 286 3543 3544 286 3545 3546 3545 3546 283 287 3547 3548 287 3547 3548 288 289 3549 3550 3596 288 3549 3550 290 291 3551 3552 291 3551 3552 291 293 3553 3554 293 3553 3554 294 3555 3556 291 294 3555 3556 293 296 3557 3558 296 3557 3558 288 295 296 3559 3560 288 295 297 3559 3560 3561 295 297 3560 3561 3562 295 3561 3562 249 295 3563 3564 249 3563 3564 294 299 3565 3566 299 3565 3566 249 298 299 3567 3568 249 298 300 3567 3568 3569 298 300 3568 3569 3570 298 3569 3570 288 298 3571 3572 288 3571 3572 302 303 3573 3574 302 3573 3574 302 3575 3576 302 304 3575 3576 3593 301 3577 3578 3577 3578 288 305 3579 3580 305 3579 3580 303 305 3581 3582 303 3581 3582 307 3583 3584 305 307 3583 3584 307 308 3585 3586 308 3585 3586 308 3587 3588 308 309 3587 3588 3589 308 309 310 3588 3589 3590 310 3589 3590 310 3591 3592 3591 3592 301 302 304 3576 3593 3594 301 3593 3594 249 287 288 3595 3596 249 288 289 3549 3595 3596 311 312 3597 3598 312 3597 3598 312 314 3599 3600 314 3599 3600 315 3601 3602 312 315 3601 3602 315 316 3603 3604 316 3603 3604 316 3605 3606 317 3605 3606 317 3607 3608 3607 3608 314 318 3609 3610 318 3609 3610 319 320 3611 3612 3658 319 3611 3612 321 322 3613 3614 322 3613 3614 322 324 3615 3616 324 3615 3616 325 3617 3618 322 325 3617 3618 324 327 3619 3620 327 3619 3620 319 326 327 3621 3622 319 326 328 3621 3622 3623 326 328 3622 3623 3624 326 3623 3624 280 326 3625 3626 280 3625 3626 325 330 3627 3628 330 3627 3628 280 329 330 3629 3630 280 329 331 3629 3630 3631 329 331 3630 3631 3632 329 3631 3632 319 329 3633 3634 319 3633 3634 333 334 3635 3636 333 3635 3636 333 3637 3638 333 335 3637 3638 3655 332 3639 3640 3639 3640 319 336 3641 3642 336 3641 3642 334 336 3643 3644 334 3643 3644 338 3645 3646 336 338 3645 3646 338 339 3647 3648 339 3647 3648 339 3649 3650 339 340 341 3649 3650 340 341 3651 3652 341 3651 3652 341 3653 3654 3653 3654 332 333 335 3638 3655 3656 332 3655 3656 280 318 319 3657 3658 280 319 320 3611 3657 3658 342 343 3659 3660 343 3659 3660 343 345 3661 3662 345 3661 3662 346 3663 3664 343 346 3663 3664 346 347 3665 3666 347 3665 3666 347 3667 3668 348 3667 3668 348 3669 3670 3669 3670 345 349 3671 3672 349 3671 3672 350 351 3673 3674 3720 350 3673 3674 352 353 3675 3676 353 3675 3676 353 355 3677 3678 355 3677 3678 356 3679 3680 353 356 3679 3680 355 358 3681 3682 358 3681 3682 350 357 358 3683 3684 350 357 359 3683 3684 3685 357 359 3684 3685 3686 357 3685 3686 311 357 3687 3688 311 3687 3688 356 361 3689 3690 361 3689 3690 311 360 361 3691 3692 311 360 362 3691 3692 3693 360 362 3692 3693 3694 360 3693 3694 350 360 3695 3696 350 3695 3696 364 365 3697 3698 364 3697 3698 364 3699 3700 364 366 3699 3700 3717 363 3701 3702 3701 3702 350 367 3703 3704 367 3703 3704 365 367 3705 3706 365 3705 3706 369 3707 3708 367 369 3707 3708 369 370 3709 3710 370 3709 3710 370 3711 3712 370 371 3711 3712 3713 370 371 372 3712 3713 3714 372 3713 3714 372 3715 3716 3715 3716 363 364 366 3700 3717 3718 363 3717 3718 311 349 350 3719 3720 311 350 351 3673 3719 3720 373 374 3721 3722 374 3721 3722 374 376 3723 3724 376 3723 3724 377 3725 3726 374 377 3725 3726 377 378 3727 3728 378 3727 3728 378 3729 3730 379 3729 3730 379 3731 3732 3731 3732 376 380 3733 3734 380 3733 3734 381 382 3735 3736 3782 381 3735 3736 383 384 3737 3738 384 3737 3738 384 386 3739 3740 386 3739 3740 387 3741 3742 384 387 3741 3742 386 389 3743 3744 389 3743 3744 381 388 389 3745 3746 381 388 390 3745 3746 3747 388 390 3746 3747 3748 388 3747 3748 342 388 3749 3750 342 3749 3750 387 392 3751 3752 392 3751 3752 342 392 3753 3754 342 391 392 393 3753 3754 3755 391 393 3754 3755 3756 391 3755 3756 381 391 3757 3758 381 3757 3758 395 396 3759 3760 395 3759 3760 395 3761 3762 395 397 3761 3762 3779 394 3763 3764 3763 3764 381 398 3765 3766 398 3765 3766 396 398 3767 3768 396 3767 3768 400 3769 3770 398 400 3769 3770 400 401 3771 3772 401 3771 3772 401 3773 3774 401 402 3773 3774 3775 401 402 403 3774 3775 3776 403 3775 3776 403 3777 3778 3777 3778 394 395 397 3762 3779 3780 394 3779 3780 342 380 3781 3782 342 380 381 382 3735 3781 3782 404 405 3783 3784 405 3783 3784 405 407 3785 3786 407 3785 3786 408 3787 3788 405 408 3787 3788 408 409 3789 3790 409 3789 3790 409 3791 3792 410 3791 3792 410 3793 3794 3793 3794 407 411 3795 3796 411 3795 3796 412 413 3797 3798 3844 412 3797 3798 414 415 3799 3800 415 3799 3800 415 417 3801 3802 417 3801 3802 418 3803 3804 415 418 3803 3804 417 420 3805 3806 420 3805 3806 412 419 420 3807 3808 412 419 421 3807 3808 3809 419 421 3808 3809 3810 419 3809 3810 373 419 3811 3812 373 3811 3812 418 423 3813 3814 423 3813 3814 373 423 3815 3816 373 412 422 423 424 3815 3816 3817 422 424 3816 3817 3818 422 3817 3818 412 422 3819 3820 412 3819 3820 426 427 3821 3822 426 3821 3822 426 3823 3824 426 428 3823 3824 3841 425 3825 3826 3825 3826 412 429 3827 3828 429 3827 3828 427 429 3829 3830 427 3829 3830 431 3831 3832 429 431 3831 3832 431 432 3833 3834 432 3833 3834 432 3835 3836 432 433 3835 3836 3837 432 433 434 3836 3837 3838 434 3837 3838 434 3839 3840 3839 3840 425 426 428 3824 3841 3842 425 3841 3842 373 411 412 3843 3844 373 412 413 3797 3843 3844 435 436 3845 3846 436 3845 3846 436 438 3847 3848 438 3847 3848 439 3849 3850 436 439 3849 3850 439 440 3851 3852 440 3851 3852 440 3853 3854 441 3853 3854 441 3855 3856 3855 3856 438 442 3857 3858 442 3857 3858 443 444 3859 3860 3906 443 3859 3860 445 446 3861 3862 446 3861 3862 446 448 3863 3864 448 3863 3864 449 3865 3866 446 449 3865 3866 448 451 3867 3868 451 3867 3868 443 451 3869 3870 404 443 450 451 452 3869 3870 3871 450 452 3870 3871 3872 450 3871 3872 404 450 3873 3874 404 3873 3874 449 454 3875 3876 454 3875 3876 404 453 454 3877 3878 404 453 455 3877 3878 3879 453 455 3878 3879 3880 453 3879 3880 443 453 3881 3882 443 3881 3882 457 458 3883 3884 457 3883 3884 457 3885 3886 457 459 3885 3886 3903 456 3887 3888 3887 3888 443 460 3889 3890 460 3889 3890 458 460 3891 3892 458 3891 3892 462 3893 3894 460 462 3893 3894 462 463 3895 3896 463 3895 3896 463 3897 3898 463 464 465 3897 3898 464 465 3899 3900 465 3899 3900 465 3901 3902 3901 3902 456 457 459 3886 3903 3904 456 3903 3904 404 442 443 3905 3906 404 443 444 3859 3905 3906 466 467 3907 3908 467 3907 3908 467 469 3909 3910 469 3909 3910 470 3911 3912 467 470 3911 3912 470 471 3913 3914 471 3913 3914 471 3915 3916 472 3915 3916 472 3917 3918 3917 3918 469 473 3919 3920 473 3919 3920 474 475 3921 3922 3968 474 3921 3922 476 477 3923 3924 477 3923 3924 477 479 3925 3926 479 3925 3926 480 3927 3928 477 480 3927 3928 479 482 3929 3930 482 3929 3930 474 482 3931 3932 474 481 482 483 3931 3932 3933 481 483 3932 3933 3934 481 3933 3934 435 481 3935 3936 435 3935 3936 480 485 3937 3938 485 3937 3938 435 485 3939 3940 435 474 484 485 486 3939 3940 3941 484 486 3940 3941 3942 484 3941 3942 474 484 3943 3944 474 3943 3944 488 489 3945 3946 488 3945 3946 488 3947 3948 487 488 490 3947 3948 487 3949 3950 3949 3950 474 491 3951 3952 491 3951 3952 489 491 1974 3953 3954 489 3953 3954 493 3955 3956 491 493 3955 3956 493 494 3957 3958 494 3957 3958 494 3959 3960 494 495 3959 3960 3961 494 495 496 3960 3961 3962 496 3961 3962 496 3963 3964 3963 3964 487 490 3965 3966 487 3965 3966 435 473 474 3967 3968 435 474 475 3921 3967 3968 497 498 3969 3970 498 3969 3970 498 500 3971 3972 500 3971 3972 501 3973 3974 498 501 3973 3974 501 502 3975 3976 502 3975 3976 502 3977 3978 503 3977 3978 503 3979 3980 3979 3980 500 504 3981 3982 504 3981 3982 505 506 3983 3984 4030 505 3983 3984 507 508 3985 3986 508 3985 3986 508 510 3987 3988 510 3987 3988 511 3989 3990 508 511 3989 3990 510 513 3991 3992 513 3991 3992 505 512 513 3993 3994 505 512 514 3993 3994 3995 512 514 3994 3995 3996 512 3995 3996 466 512 3997 3998 466 3997 3998 511 516 3999 4000 516 3999 4000 466 516 4001 4002 466 515 516 517 4001 4002 4003 515 517 4002 4003 4004 515 4003 4004 505 515 4005 4006 505 4005 4006 519 520 4007 4008 519 4007 4008 519 4009 4010 519 521 4009 4010 4027 518 4011 4012 4011 4012 505 522 4013 4014 522 4013 4014 520 522 4015 4016 520 4015 4016 524 4017 4018 522 524 4017 4018 524 525 4019 4020 525 4019 4020 525 4021 4022 525 526 4021 4022 4023 525 526 527 4022 4023 4024 527 4023 4024 527 4025 4026 4025 4026 518 519 521 4010 4027 4028 518 4027 4028 466 504 4029 4030 466 504 505 506 3983 4029 4030 528 529 4031 4032 529 4031 4032 529 531 4033 4034 531 4033 4034 532 4035 4036 529 532 4035 4036 532 533 4037 4038 533 4037 4038 533 4039 4040 534 4039 4040 534 4041 4042 4041 4042 531 535 4043 4044 535 4043 4044 536 537 4045 4046 4092 536 4045 4046 538 539 4047 4048 539 4047 4048 539 541 4049 4050 541 4049 4050 542 4051 4052 539 542 4051 4052 541 544 4053 4054 544 4053 4054 536 544 4055 4056 536 543 544 545 4055 4056 4057 543 545 4056 4057 4058 543 4057 4058 497 543 4059 4060 497 4059 4060 542 547 4061 4062 547 4061 4062 497 547 4063 4064 497 546 547 548 4063 4064 4065 546 548 4064 4065 4066 546 4065 4066 536 546 4067 4068 536 4067 4068 550 551 4069 4070 550 4069 4070 550 4071 4072 550 552 4071 4072 4089 549 4073 4074 4073 4074 536 553 4075 4076 553 4075 4076 551 553 4077 4078 551 4077 4078 555 4079 4080 553 555 4079 4080 555 556 4081 4082 556 4081 4082 556 4083 4084 556 557 4083 4084 4085 556 557 558 4084 4085 4086 558 4085 4086 558 4087 4088 4087 4088 549 550 552 4072 4089 4090 549 4089 4090 497 535 4091 4092 497 535 536 537 4045 4091 4092 559 560 4093 4094 560 4093 4094 560 562 4095 4096 562 4095 4096 563 4097 4098 560 563 4097 4098 563 564 4099 4100 564 4099 4100 564 4101 4102 565 4101 4102 565 4103 4104 4103 4104 562 566 4105 4106 566 4105 4106 567 568 4107 4108 4154 567 4107 4108 569 570 4109 4110 570 4109 4110 570 572 4111 4112 572 4111 4112 573 4113 4114 570 573 4113 4114 572 575 4115 4116 575 4115 4116 567 574 575 4117 4118 567 574 576 4117 4118 4119 574 576 4118 4119 4120 574 4119 4120 528 574 4121 4122 528 4121 4122 573 578 4123 4124 578 4123 4124 528 577 578 4125 4126 528 577 579 4125 4126 4127 577 579 4126 4127 4128 577 4127 4128 567 577 4129 4130 567 4129 4130 581 582 4131 4132 581 4131 4132 581 4133 4134 581 583 4133 4134 4151 580 4135 4136 4135 4136 567 584 4137 4138 584 4137 4138 582 584 4139 4140 582 4139 4140 586 4141 4142 584 586 4141 4142 586 587 4143 4144 587 4143 4144 587 4145 4146 587 588 4145 4146 4147 587 588 589 4146 4147 4148 589 4147 4148 589 4149 4150 4149 4150 580 581 583 4134 4151 4152 580 4151 4152 528 566 567 4153 4154 528 567 568 4107 4153 4154 590 591 4155 4156 591 4155 4156 591 593 4157 4158 593 4157 4158 594 4159 4160 591 594 4159 4160 594 595 4161 4162 595 4161 4162 595 4163 4164 596 4163 4164 596 4165 4166 4165 4166 593 597 4167 4168 597 4167 4168 598 599 4169 4170 4216 598 4169 4170 600 601 4171 4172 601 4171 4172 601 603 4173 4174 603 4173 4174 604 4175 4176 601 604 4175 4176 603 606 4177 4178 606 4177 4178 598 606 4179 4180 559 598 605 606 607 4179 4180 4181 605 607 4180 4181 4182 605 4181 4182 559 605 4183 4184 559 4183 4184 604 609 4185 4186 609 4185 4186 559 609 4187 4188 559 608 609 610 4187 4188 4189 608 610 4188 4189 4190 608 4189 4190 598 608 4191 4192 598 4191 4192 612 613 4193 4194 612 4193 4194 612 614 4195 4196 614 4195 4196 611 4197 4198 4197 4198 598 615 4199 4200 615 4199 4200 613 615 4201 4202 613 4201 4202 617 4203 4204 615 617 4203 4204 617 618 4205 4206 618 4205 4206 618 619 4207 4208 619 4207 4208 618 619 620 4209 4210 620 4209 4210 620 4211 4212 4211 4212 611 612 614 4213 4214 611 4213 4214 559 597 4215 4216 559 597 598 599 4169 4215 4216 621 622 4217 4218 622 4217 4218 622 624 4219 4220 624 4219 4220 625 4221 4222 622 625 4221 4222 625 626 4223 4224 626 4223 4224 626 4225 4226 627 4225 4226 627 4227 4228 4227 4228 624 628 4229 4230 628 4229 4230 629 630 4231 4232 4278 629 4231 4232 631 632 2251 4233 4234 632 4233 4234 632 634 4235 4236 634 4235 4236 635 4237 4238 632 635 4237 4238 634 637 4239 4240 637 4239 4240 629 636 637 4241 4242 629 636 638 4241 4242 636 638 4243 4244 636 4243 4244 590 636 4245 4246 590 4245 4246 635 640 4247 4248 640 4247 4248 590 640 4249 4250 590 639 640 641 4249 4250 639 641 4251 4252 639 4251 4252 629 639 4253 4254 629 4253 4254 643 644 4255 4256 643 4255 4256 643 4257 4258 642 643 645 2277 4257 4258 642 4259 4260 4259 4260 629 646 4261 4262 646 4261 4262 644 646 4263 4264 644 4263 4264 648 4265 4266 646 648 4265 4266 648 649 4267 4268 649 4267 4268 649 650 4269 4270 650 4269 4270 649 650 651 2289 4271 4272 651 4271 4272 651 4273 4274 4273 4274 642 645 4275 4276 642 4275 4276 590 628 4277 4278 590 628 629 630 4231 4277 4278 652 653 4279 4280 653 4279 4280 653 655 4281 4282 655 4281 4282 656 4283 4284 653 656 4283 4284 656 657 4285 4286 657 4285 4286 657 4287 4288 658 4287 4288 658 4289 4290 4289 4290 655 659 4291 4292 659 4291 4292 660 661 4293 4294 4340 660 4293 4294 662 663 2313 4295 4296 663 4295 4296 663 665 4297 4298 665 4297 4298 666 4299 4300 663 666 4299 4300 665 668 4301 4302 668 4301 4302 660 667 668 4303 4304 660 667 669 4303 4304 667 669 4305 4306 667 4305 4306 621 667 4307 4308 621 4307 4308 666 671 4309 4310 671 4309 4310 621 671 4311 4312 621 670 671 672 4311 4312 670 672 4313 4314 670 4313 4314 660 670 4315 4316 660 4315 4316 674 675 4317 4318 674 4317 4318 674 676 4319 4320 676 4319 4320 673 4321 4322 4321 4322 660 677 4323 4324 677 4323 4324 675 677 4325 4326 675 4325 4326 679 4327 4328 677 679 4327 4328 679 680 4329 4330 680 4329 4330 680 681 4331 4332 681 4331 4332 680 681 682 4333 4334 682 4333 4334 682 4335 4336 4335 4336 673 674 676 4337 4338 673 4337 4338 621 659 660 4339 4340 621 660 661 4293 4339 4340 683 684 4341 4342 684 4341 4342 684 686 4343 4344 686 4343 4344 687 4345 4346 684 687 4345 4346 687 688 2363 4347 4348 688 4347 4348 688 4349 4350 689 4349 4350 689 4351 4352 4351 4352 686 690 4353 4354 690 4353 4354 691 692 4355 4356 4402 691 4355 4356 693 694 2375 4357 4358 694 4357 4358 694 696 4359 4360 696 4359 4360 697 4361 4362 694 697 4361 4362 696 699 4363 4364 699 4363 4364 691 699 4365 4366 691 698 699 700 4365 4366 698 700 4367 4368 698 4367 4368 652 698 4369 4370 652 4369 4370 697 702 4371 4372 702 4371 4372 652 702 4373 4374 652 701 702 703 4373 4374 701 703 4375 4376 701 4375 4376 691 701 4377 4378 691 4377 4378 705 706 4379 4380 705 4379 4380 705 707 4381 4382 707 4381 4382 704 4383 4384 4383 4384 691 708 4385 4386 708 4385 4386 706 708 4387 4388 706 4387 4388 710 4389 4390 708 710 4389 4390 710 711 4391 4392 711 4391 4392 711 4393 4394 711 712 713 2413 4393 4394 712 713 4395 4396 713 4395 4396 713 4397 4398 4397 4398 704 705 707 2401 4399 4400 704 4399 4400 652 690 691 4401 4402 652 691 692 4355 4401 4402 714 715 4403 4404 715 4403 4404 715 717 4405 4406 717 4405 4406 718 4407 4408 715 718 4407 4408 718 719 4409 4410 719 4409 4410 719 4411 4412 720 4411 4412 720 4413 4414 4413 4414 717 721 4415 4416 721 4415 4416 722 723 4417 4418 4464 722 4417 4418 724 725 2437 4419 4420 725 4419 4420 725 727 4421 4422 727 4421 4422 728 4423 4424 725 728 4423 4424 727 730 4425 4426 730 4425 4426 722 729 730 4427 4428 722 729 731 4427 4428 729 731 4429 4430 729 4429 4430 683 729 4431 4432 683 4431 4432 728 733 4433 4434 733 4433 4434 683 733 4435 4436 683 732 733 734 4435 4436 732 734 4437 4438 732 4437 4438 722 732 4439 4440 722 4439 4440 736 737 4441 4442 736 4441 4442 736 738 4443 4444 738 4443 4444 735 4445 4446 4445 4446 722 739 4447 4448 739 4447 4448 737 739 4449 4450 737 4449 4450 741 4451 4452 739 741 4451 4452 741 742 4453 4454 742 4453 4454 742 743 4455 4456 743 4455 4456 742 743 744 4457 4458 744 4457 4458 744 4459 4460 4459 4460 735 736 738 4461 4462 735 4461 4462 683 721 4463 4464 683 721 722 723 4417 4463 4464 745 746 4465 4466 746 4465 4466 746 748 4467 4468 748 4467 4468 749 4469 4470 746 749 4469 4470 749 750 4471 4472 750 4471 4472 750 4473 4474 751 4473 4474 751 4475 4476 4475 4476 748 752 4477 4478 752 4477 4478 753 754 4479 4480 4526 753 4479 4480 755 756 2499 4481 4482 756 4481 4482 756 758 4483 4484 758 4483 4484 759 4485 4486 756 759 4485 4486 758 761 4487 4488 761 4487 4488 753 761 4489 4490 753 760 761 762 4489 4490 760 762 4491 4492 760 4491 4492 714 760 4493 4494 714 4493 4494 759 764 4495 4496 764 4495 4496 714 763 764 4497 4498 714 763 765 4497 4498 4499 763 765 4498 4499 4500 763 4499 4500 753 763 4501 4502 753 4501 4502 767 768 2541 4503 4504 767 4503 4504 767 769 4505 4506 769 4505 4506 766 4507 4508 4507 4508 753 770 4509 4510 770 4509 4510 768 770 4511 4512 768 4511 4512 772 4513 4514 770 772 4513 4514 772 773 4515 4516 773 4515 4516 773 774 4517 4518 774 4517 4518 773 774 775 2537 4519 4520 775 4519 4520 775 4521 4522 4521 4522 766 767 769 2525 4523 4524 766 4523 4524 714 752 753 4525 4526 714 753 754 4479 4525 4526 776 777 4527 4528 777 4527 4528 777 779 4529 4530 779 4529 4530 780 4531 4532 777 780 4531 4532 780 781 4533 4534 781 4533 4534 781 4535 4536 782 4535 4536 782 4537 4538 4537 4538 779 783 4539 4540 783 4539 4540 784 785 4541 4542 4588 784 4541 4542 786 787 2561 4543 4544 787 4543 4544 787 789 4545 4546 789 4545 4546 790 4547 4548 787 790 4547 4548 789 792 4549 4550 792 4549 4550 784 792 4551 4552 745 784 791 792 793 4551 4552 791 793 4553 4554 791 4553 4554 745 791 4555 4556 745 4555 4556 790 795 4557 4558 795 4557 4558 745 795 4559 4560 745 784 794 795 796 4559 4560 794 796 4561 4562 794 4561 4562 784 794 4563 4564 784 4563 4564 798 799 4565 4566 798 4565 4566 797 798 4567 4568 797 800 4567 4568 797 4569 4570 4569 4570 784 801 4571 4572 801 4571 4572 799 801 4573 4574 799 4573 4574 803 4575 4576 801 803 4575 4576 803 804 4577 4578 804 4577 4578 804 805 4579 4580 805 4579 4580 805 806 4581 4582 806 4581 4582 806 4583 4584 4583 4584 797 800 4585 4586 797 4585 4586 745 783 4587 4588 745 783 784 785 4541 4587 4588 807 808 4589 4590 808 4589 4590 808 810 4591 4592 810 4591 4592 811 4593 4594 808 811 4593 4594 811 812 4595 4596 812 4595 4596 812 4597 4598 813 4597 4598 813 4599 4600 4599 4600 810 814 4601 4602 814 4601 4602 815 816 4603 4604 4650 815 4603 4604 817 818 2623 4605 4606 818 4605 4606 818 820 4607 4608 820 4607 4608 821 4609 4610 818 821 4609 4610 820 823 4611 4612 823 4611 4612 815 823 4613 4614 776 815 822 823 824 4613 4614 4615 822 824 4614 4615 4616 822 4615 4616 776 822 4617 4618 776 4617 4618 821 826 4619 4620 826 4619 4620 776 826 4621 4622 776 815 825 826 827 4621 4622 4623 825 827 4622 4623 4624 825 4623 4624 815 825 4625 4626 815 4625 4626 829 830 4627 4628 829 4627 4628 829 831 4629 4630 831 4629 4630 828 4631 4632 4631 4632 815 832 4633 4634 832 4633 4634 830 832 4635 4636 830 4635 4636 834 4637 4638 832 834 4637 4638 834 835 4639 4640 835 4639 4640 835 836 4641 4642 836 4641 4642 836 837 4643 4644 837 4643 4644 837 4645 4646 4645 4646 828 831 4647 4648 828 4647 4648 776 814 4649 4650 776 814 815 816 4603 4649 4650 838 839 4651 4652 839 4651 4652 839 841 4653 4654 841 4653 4654 842 4655 4656 839 842 4655 4656 842 843 4657 4658 843 4657 4658 843 4659 4660 844 4659 4660 844 4661 4662 4661 4662 841 845 4663 4664 845 4663 4664 846 847 4665 4666 846 4665 4666 848 849 4667 4668 849 4667 4668 849 851 4669 4670 851 4669 4670 852 4671 4672 849 852 4671 4672 851 854 4673 4674 854 4673 4674 846 854 4675 4676 807 846 853 854 855 4675 4676 4677 853 855 4676 4677 4678 853 4677 4678 807 853 4679 4680 807 4679 4680 852 857 4681 4682 857 4681 4682 807 857 4683 4684 807 846 856 857 858 4683 4684 856 858 4685 4686 856 4685 4686 846 856 4687 4688 846 4687 4688 860 861 4689 4690 860 4689 4690 860 862 4691 4692 862 4691 4692 859 4693 4694 4693 4694 846 863 4695 4696 863 4695 4696 861 863 4697 4698 861 4697 4698 865 4699 4700 863 865 4699 4700 865 866 4701 4702 866 4701 4702 866 868 4703 4704 867 868 4703 4704 867 868 4705 4706 868 4705 4706 868 4707 4708 4707 4708 859 862 4709 4710 859 4709 4710 807 845 4711 4712 807 845 846 847 4711 4712 869 870 4713 4714 870 4713 4714 870 872 4715 4716 872 4715 4716 873 4717 4718 870 873 4717 4718 873 874 4719 4720 874 4719 4720 874 4721 4722 875 4721 4722 875 4723 4724 4723 4724 872 876 4725 4726 876 4725 4726 877 878 4727 4728 4774 877 4727 4728 879 880 4729 4730 880 4729 4730 880 882 4731 4732 882 4731 4732 883 4733 4734 880 883 4733 4734 882 885 4735 4736 885 4735 4736 877 885 4737 4738 838 877 884 885 886 4737 4738 4739 884 886 4738 4739 4740 884 4739 4740 838 884 4741 4742 838 4741 4742 883 888 4743 4744 888 4743 4744 838 888 4745 4746 838 887 888 889 4745 4746 4747 887 889 4746 4747 4748 887 4747 4748 877 887 4749 4750 877 4749 4750 891 892 4751 4752 891 4751 4752 891 893 4753 4754 893 4753 4754 890 4755 4756 4755 4756 877 894 4757 4758 894 4757 4758 892 894 4759 4760 892 4759 4760 896 4761 4762 894 896 4761 4762 896 897 4763 4764 897 4763 4764 897 898 4765 4766 898 4765 4766 898 899 4767 4768 899 4767 4768 899 4769 4770 4769 4770 890 893 4771 4772 890 4771 4772 838 876 4773 4774 838 876 877 878 4727 4773 4774 900 901 4775 4776 901 4775 4776 901 903 4777 4778 903 4777 4778 904 4779 4780 901 904 4779 4780 904 905 4781 4782 905 4781 4782 905 4783 4784 906 4783 4784 906 4785 4786 4785 4786 903 907 4787 4788 907 4787 4788 908 909 4789 4790 908 4789 4790 910 911 4791 4792 911 4791 4792 911 913 4793 4794 913 4793 4794 914 4795 4796 911 914 4795 4796 913 916 4797 4798 916 4797 4798 908 916 4799 4800 869 908 915 916 917 4799 4800 4801 915 917 4800 4801 4802 915 4801 4802 869 915 4803 4804 869 4803 4804 914 919 4805 4806 919 4805 4806 869 919 4807 4808 869 908 918 919 920 4807 4808 918 920 4809 4810 918 4809 4810 908 918 4811 4812 908 4811 4812 922 923 4813 4814 922 4813 4814 922 924 4815 4816 924 4815 4816 921 4817 4818 4817 4818 908 925 4819 4820 925 4819 4820 923 925 4821 4822 923 4821 4822 927 4823 4824 925 927 4823 4824 927 928 4825 4826 928 4825 4826 928 930 4827 4828 929 930 4827 4828 929 930 4829 4830 930 4829 4830 930 4831 4832 4831 4832 921 924 4833 4834 921 4833 4834 869 907 908 4835 4836 869 908 909 4835 4836 931 932 4837 4838 932 4837 4838 932 934 4839 4840 934 4839 4840 935 4841 4842 932 935 4841 4842 935 936 4843 4844 936 4843 4844 936 4845 4846 937 4845 4846 937 4847 4848 4847 4848 934 938 4849 4850 938 4849 4850 939 940 4851 4852 939 4851 4852 941 942 4853 4854 942 4853 4854 942 944 4855 4856 944 4855 4856 945 4857 4858 942 945 4857 4858 944 947 4859 4860 947 4859 4860 939 947 4861 4862 900 939 946 947 948 4861 4862 4863 946 948 4862 4863 4864 946 4863 4864 900 946 4865 4866 900 4865 4866 945 950 4867 4868 950 4867 4868 900 950 4869 4870 900 939 949 950 951 4869 4870 949 951 4871 4872 949 4871 4872 939 949 4873 4874 939 4873 4874 953 954 4875 4876 953 4875 4876 953 955 4877 4878 955 4877 4878 952 4879 4880 4879 4880 939 956 4881 4882 956 4881 4882 954 956 4883 4884 954 4883 4884 958 4885 4886 956 958 4885 4886 958 959 4887 4888 959 4887 4888 959 961 4889 4890 960 961 4889 4890 960 961 4891 4892 961 4891 4892 961 4893 4894 4893 4894 952 955 4895 4896 952 4895 4896 900 938 939 4897 4898 900 939 940 4897 4898 962 963 2915 4899 4900 963 4899 4900 963 965 4901 4902 965 4901 4902 966 4903 4904 963 966 4903 4904 966 967 4905 4906 967 4905 4906 967 4907 4908 968 4907 4908 968 4909 4910 4909 4910 965 969 4911 4912 969 4911 4912 970 971 4913 4914 4960 970 4913 4914 972 973 2933 4915 4916 973 4915 4916 973 975 4917 4918 975 4917 4918 976 4919 4920 973 976 4919 4920 975 978 4921 4922 978 4921 4922 970 977 978 4923 4924 970 977 979 4923 4924 977 979 4925 4926 977 4925 4926 931 977 4927 4928 931 4927 4928 976 981 4929 4930 981 4929 4930 931 980 981 4931 4932 931 980 982 4931 4932 980 982 4933 4934 980 4933 4934 970 980 4935 4936 970 4935 4936 984 985 4937 4938 984 4937 4938 983 984 4939 4940 983 986 4939 4940 983 4941 4942 4941 4942 970 987 4943 4944 987 4943 4944 985 987 4945 4946 985 4945 4946 989 4947 4948 987 989 4947 4948 989 990 4949 4950 990 4949 4950 990 991 4951 4952 991 4951 4952 991 992 4953 4954 992 4953 4954 992 4955 4956 4955 4956 983 986 4957 4958 983 4957 4958 931 969 970 4959 4960 931 970 971 4913 4959 4960 0.320886418015886993E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435047786080177985E-03 -0.435050661596638019E-03 -0.435015088300818990E-03 -0.435084759117159022E-03 -0.435058330687056996E-03 -0.435040431898216988E-03 -0.166666666666667011E-01 0.000000000000000000E+00 -0.138286756523262001E-03 -0.140229438392076996E-03 -0.138900356244381991E-03 -0.139615838670957006E-03 -0.138288251646271991E-03 -0.140227943269067006E-03 0.000000000000000000E+00 0.419834067892811968E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423305217256013E-03 -0.161424187158655996E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435071410144034006E-03 -0.435067418468867008E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.000000000000000000E+00 0.255994304135462002E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.434980740464878987E-03 -0.435120576273734998E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138286719937541003E-03 -0.140229474977798997E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.253690283189994988E-01 -0.434980401777350985E-03 -0.435120927891995981E-03 -0.666666666666666970E-02 -0.140229499401465997E-03 -0.138286695513873000E-03 -0.166666666666667011E-01 0.319795589003930014E-01 -0.161423181596791996E-03 -0.161424310779120013E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435063425289485017E-03 -0.435075493914430017E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.320997920666244024E-01 0.000000000000000000E+00 -0.161423317051876997E-03 -0.161424175324035012E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435064354014662996E-03 -0.435074543616996011E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.292675967555632993E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423746720073988E-03 -0.161423745655837994E-03 -0.161424013317901000E-03 -0.161423479058011009E-03 -0.161423479016896003E-03 -0.161424013359016006E-03 -0.666666666666666970E-02 -0.434770925530010013E-03 -0.435374516178396991E-03 -0.434923678631289005E-03 -0.435218350961854000E-03 -0.434767192010144024E-03 -0.435378332843076009E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.238543465439772992E-01 -0.666666666666666970E-02 -0.166666666666667011E-01 0.420929664672890988E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.435014954239530986E-03 -0.435084898922045006E-03 -0.666666666666666970E-02 -0.138286666946142001E-03 -0.140229527969196996E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.255503077544010984E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423360613601998E-03 -0.161424131762310011E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435068344066269984E-03 -0.435070463673477021E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.252823531939237005E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423118685809013E-03 -0.161424373690102996E-03 -0.666666666666666970E-02 -0.139258124485676013E-03 -0.139258070429663011E-03 -0.166666666666667011E-01 0.255489774258116989E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423314542717992E-03 -0.161424177833194993E-03 -0.434708406678657993E-03 -0.435438495815256975E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.322022752306447974E-01 -0.435029928828667000E-03 -0.435069283946130990E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.138272576530863001E-03 -0.140243618384475996E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.258515188587684006E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435046874781375994E-03 -0.435051611957959993E-03 -0.138907408932722987E-03 -0.139608785982616010E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.320997478880679990E-01 -0.161423315884339007E-03 -0.161424176491573002E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435068970788089987E-03 -0.435069822829565989E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.255013626659516997E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161778512187199996E-03 -0.161068980188712013E-03 -0.166666666666667011E-01 -0.430721544738902977E-03 -0.439524019069752994E-03 0.000000000000000000E+00 0.319967450902398018E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.161423540386620003E-03 -0.161423951989293009E-03 -0.436779536615916999E-03 -0.433396525451669977E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.257464062516243007E-01 0.000000000000000000E+00 -0.357123043190452993E-03 -0.513335165827105049E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 -0.138849239149727987E-03 -0.139666955765612013E-03 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.421600788316065025E-01 0.000000000000000000E+00 -0.438009740604286991E-03 -0.432217430813423007E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.110342284848130999E-03 -0.562588969540806947E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.250247725514408000E-01 0.000000000000000000E+00 0.312521876302675970E-04 -0.718933610019259007E-03 -0.666666666666666970E-02 -0.143947962448449994E-03 -0.134568232466890006E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.321784029946046990E-01 0.000000000000000000E+00 -0.131393250908101988E-03 -0.556288171480889002E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.344954211373242007E-03 -0.526364720145119946E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 0.000000000000000000E+00 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.252170876990303987E-01 -0.161423598610935996E-03 -0.161423893764976013E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435940683599852022E-03 -0.434217221119004976E-03 -0.166666666666667011E-01 0.320886759388818998E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435055736276135005E-03 -0.435042919782540022E-03 -0.435014872722787977E-03 -0.435084983920160974E-03 -0.435084944266096979E-03 -0.435014910749576022E-03 -0.166666666666667011E-01 -0.138286772670705003E-03 -0.140229422244634997E-03 -0.138288005601589994E-03 -0.140228189313749003E-03 -0.138900291039652002E-03 -0.139615903875686995E-03 0.000000000000000000E+00 0.419834016282712025E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423305078950007E-03 -0.161424187296962002E-03 -0.666666666666666970E-02 -0.435072265974590982E-03 -0.435066581504923002E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.255995473964411996E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.434977413290588025E-03 -0.435124045681951985E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138286961247241009E-03 -0.140229233668097989E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.253627459848576008E-01 0.000000000000000000E+00 -0.434982364929778985E-03 -0.435118882445508014E-03 -0.666666666666666970E-02 -0.140242815655179008E-03 -0.138273379260159989E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319681677869403033E-01 -0.161422464571142990E-03 -0.161425027804768992E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.434576582527966993E-03 -0.435573489949336987E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.320997527624522980E-01 0.000000000000000000E+00 -0.161423316033646006E-03 -0.161424176342266003E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435072976730357019E-03 -0.435065886383583010E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.292687587508236001E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423746234547004E-03 -0.161423746141366008E-03 -0.161424006944943987E-03 -0.161423485430967995E-03 -0.161423485376057003E-03 -0.161424006999856008E-03 -0.666666666666666970E-02 -0.435021380261554999E-03 -0.435118476044887013E-03 -0.434889017803224000E-03 -0.435253786460061004E-03 -0.434968358559784986E-03 -0.435172681171430026E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.238543465439772992E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 0.000000000000000000E+00 0.420929989771356011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435014809641986002E-03 -0.435085049702760990E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.138286733578440991E-03 -0.140229461336898007E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.255502685417309007E-01 0.000000000000000000E+00 -0.161423359690042998E-03 -0.161424132685869011E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435070746654247023E-03 -0.435068067322774026E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.252823730509023001E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423118749988009E-03 -0.161424373625924000E-03 -0.666666666666666970E-02 -0.139258062928016002E-03 -0.139258131987322995E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.255489363703280993E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423313440402000E-03 -0.161424178935510009E-03 0.000000000000000000E+00 -0.434870514603341021E-03 -0.435272746258683974E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.322040616077563027E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435018118913463021E-03 -0.435081599403137019E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.138276442783731991E-03 -0.140239752131608009E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.258538650317957017E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435040431700919026E-03 -0.435058330932033995E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.138909454179009012E-03 -0.139606740736330012E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320997274074339978E-01 -0.161423315341556987E-03 -0.161424177034354995E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435070727065607990E-03 -0.435068086477601978E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.256925653634595998E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.430785237193071976E-03 -0.439488203700820024E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.138857649137926994E-03 -0.139658545777412004E-03 0.000000000000000000E+00 0.319726623957349021E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.161422835104515990E-03 -0.161424657271395992E-03 -0.427144778551284004E-03 -0.443167299433834994E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.256448637616590001E-01 0.000000000000000000E+00 -0.324936573418277007E-03 -0.544311326562666028E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 -0.138671855069575001E-03 -0.139844339845764999E-03 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.421522789439464016E-01 0.000000000000000000E+00 -0.438342696270233999E-03 -0.431915478441594015E-03 -0.666666666666666970E-02 0.000000000000000000E+00 0.403316551935295006E-04 -0.713262909582468036E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.250271632275507992E-01 0.000000000000000000E+00 0.157694230048913999E-04 -0.703450845393883040E-03 -0.666666666666666970E-02 -0.143088258477573990E-03 -0.135427936437765008E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.321536300563562971E-01 0.000000000000000000E+00 0.469561289675323007E-04 -0.734637551356524029E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.337097960055970989E-03 -0.529554555471797037E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.251453403201698997E-01 -0.161349265278626000E-03 -0.161498227097286009E-03 -0.666666666666666970E-02 -0.239135715409318991E-03 -0.630652995284446973E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.312158150516623004E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161424178244695010E-03 -0.161423314131216999E-03 -0.161423314059395002E-03 -0.161424178316517007E-03 -0.161423748120379993E-03 -0.161423744255532992E-03 -0.166666666666667011E-01 -0.435052937193477996E-03 -0.435086217679552987E-03 -0.138285143313543007E-03 -0.140231051601796993E-03 -0.435084982345309993E-03 -0.435054144804906983E-03 0.000000000000000000E+00 0.420929687910095995E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435023352535621991E-03 -0.435076141203039014E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.138286660715920991E-03 -0.140229534199418006E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.256871015736835004E-01 0.000000000000000000E+00 -0.435049188847810018E-03 -0.435049198755422021E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.139258077328959006E-03 -0.139258117586380995E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.252598187185257006E-01 0.000000000000000000E+00 -0.161423313380501998E-03 -0.161424178995410011E-03 -0.666666666666666970E-02 -0.435070190010078980E-03 -0.435068611700095025E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.320929801235649015E-01 -0.435014945836726004E-03 -0.435084907682568021E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.138286694923928999E-03 -0.140229499991409998E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320997361127142999E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423315570077993E-03 -0.161424176805833989E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069372832834977E-03 -0.435069411726189985E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.296373133570986998E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435042919779554996E-03 -0.435055736280175014E-03 -0.435084883176088981E-03 -0.435014969337498025E-03 -0.435014925977329975E-03 -0.435084928391770982E-03 -0.666666666666666970E-02 -0.138910376259480993E-03 -0.139605818655859007E-03 -0.138287928869166004E-03 -0.140228266046172993E-03 0.000000000000000000E+00 -0.138286695535614994E-03 -0.140229499379725006E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.238543465439772992E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 0.000000000000000000E+00 0.419834073423381998E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423305232433000E-03 -0.161424187143480012E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435069384652433987E-03 -0.435069399640343013E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.256582249639075999E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.434980660603881994E-03 -0.435120659531148981E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138286858193205001E-03 -0.140229336722134999E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.256868354321434997E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.434980641366321990E-03 -0.435120678040121982E-03 -0.666666666666666970E-02 -0.140229336893623989E-03 -0.138286858021716011E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.255642318718434999E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423743718790006E-03 -0.161423748657122003E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.435032845221742983E-03 -0.435106762281529014E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320996545778364992E-01 0.000000000000000000E+00 -0.161423313381549988E-03 -0.161424178994361994E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069384536214995E-03 -0.435069399759127010E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.256786742296024988E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423743714627998E-03 -0.161423748661284011E-03 0.000000000000000000E+00 -0.435032854625325984E-03 -0.435106752665999992E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320996544124479005E-01 -0.161423313377080012E-03 -0.161424178998831997E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069378322735991E-03 -0.435069406112619992E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.256614590292301992E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435016112049723975E-03 -0.435083691563065015E-03 -0.166666666666667011E-01 -0.138320998722293008E-03 -0.140195196193045990E-03 0.000000000000000000E+00 0.319837098823798002E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.161423313471231993E-03 -0.161424178904679989E-03 -0.435069384789494977E-03 -0.435069399500138975E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.256582249673321001E-01 0.000000000000000000E+00 -0.434980660337917985E-03 -0.435120659808483994E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 -0.138286858200591995E-03 -0.140229336714748005E-03 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.419834073415166001E-01 0.000000000000000000E+00 -0.161423305232409988E-03 -0.161424187143501994E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435069382174439019E-03 -0.435069402174196023E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.253691078098052004E-01 0.000000000000000000E+00 -0.434980641344910027E-03 -0.435120678062418979E-03 -0.666666666666666970E-02 -0.140229336715104002E-03 -0.138286858200234995E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319837098823800986E-01 0.000000000000000000E+00 -0.161423313471231993E-03 -0.161424178904679989E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069384776443026E-03 -0.435069399513485016E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.253136149492917996E-01 -0.435016112049779974E-03 -0.435083691563007010E-03 -0.666666666666666970E-02 -0.138320998722294011E-03 -0.140195196193044987E-03 -0.166666666666667011E-01 0.312157552277101992E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161424178824619004E-03 -0.161423313551293005E-03 -0.161423313542176003E-03 -0.161424178833736006E-03 -0.161423748436264003E-03 -0.161423743939648006E-03 -0.166666666666667011E-01 -0.435065044970892999E-03 -0.435073837108244986E-03 -0.138284790665693988E-03 -0.140231404249645009E-03 -0.435089215417341023E-03 -0.435050005423793986E-03 0.000000000000000000E+00 0.420929694350214018E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435016885622778980E-03 -0.435082884886724001E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.138286670421676992E-03 -0.140229524493662005E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.254915570467122987E-01 0.000000000000000000E+00 -0.161423360047314990E-03 -0.161424132328596992E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435068591366232014E-03 -0.435070210801716977E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.249645634044841011E-01 0.000000000000000000E+00 -0.161423119061156986E-03 -0.161424373314754995E-03 -0.666666666666666970E-02 -0.139258118141762012E-03 -0.139258076773577012E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319795589138204966E-01 -0.161423181597302004E-03 -0.161424310778610005E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435063329379546009E-03 -0.435075591995543994E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.322057822807787983E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435022004975030990E-03 -0.435077546786745027E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.138280140954257988E-03 -0.140236053961082012E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.286987467725142997E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423485461702009E-03 -0.161424006914210000E-03 -0.161423743701962999E-03 -0.161423748673950013E-03 -0.161423485478031991E-03 -0.161424006897879991E-03 -0.666666666666666970E-02 -0.434818199339674985E-03 -0.435326186862402978E-03 -0.138107939681827004E-03 -0.140408255233511993E-03 0.000000000000000000E+00 -0.434750487935355013E-03 -0.435395405802110002E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.238543465439772992E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 0.000000000000000000E+00 0.419834078568206009E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423305246576987E-03 -0.161424187129334995E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435069368742756007E-03 -0.435069415908644010E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.256580957796501002E-01 0.000000000000000000E+00 -0.434980863637934997E-03 -0.435120447850457018E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138286593339164988E-03 -0.140229601576174009E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.256868273049125004E-01 0.000000000000000000E+00 -0.434980612246946025E-03 -0.435120708410804025E-03 -0.666666666666666970E-02 -0.140229353764145001E-03 -0.138286841151193996E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.256611955308473985E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435106719725676021E-03 -0.434994029732162983E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.138320118013872997E-03 -0.140196076901467003E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320999233193210026E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423320520808004E-03 -0.161424171855104005E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435059778147493012E-03 -0.435079222540740013E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.257716863609747000E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435022363974353014E-03 -0.435077172536910996E-03 0.000000000000000000E+00 -0.138312498857105987E-03 -0.140203696058234013E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320996765524547006E-01 0.000000000000000000E+00 -0.161423313971483987E-03 -0.161424178404427995E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435068773858159996E-03 -0.435070024197671990E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.255013626644550011E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161778513994175003E-03 -0.161068978381738009E-03 -0.166666666666667011E-01 -0.430721544928323017E-03 -0.439524018875910982E-03 0.000000000000000000E+00 0.319967451293014030E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.161423540387061002E-03 -0.161423951988851007E-03 -0.436779134539225992E-03 -0.433396918749564988E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.257463974004799000E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.357123790381716987E-03 -0.513334443915504997E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 -0.138849213053176987E-03 -0.139666981862162010E-03 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.421600707339388966E-01 0.000000000000000000E+00 -0.438162222473089020E-03 -0.432071491834290977E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.110310967384949997E-03 -0.562620287003987962E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.250247683829405015E-01 0.000000000000000000E+00 0.312489112000452975E-04 -0.718930333589037039E-03 -0.666666666666666970E-02 -0.143948366628467004E-03 -0.134567828286871993E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.321784029807069966E-01 0.000000000000000000E+00 -0.131393333165154990E-03 -0.556288089223836053E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.344952960576817022E-03 -0.526365968885628946E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 0.000000000000000000E+00 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.252170877020880015E-01 -0.161423598610962993E-03 -0.161423893764948989E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435940683620558982E-03 -0.434217221098728986E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.312157364394618012E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423748574843987E-03 -0.161423743801067995E-03 -0.161423313378308007E-03 -0.161424178997604002E-03 -0.161424178997597009E-03 -0.161423313378315000E-03 -0.166666666666667011E-01 -0.435069384129798016E-03 -0.435069400174701974E-03 -0.435069377183192995E-03 -0.435069407277841015E-03 -0.138284694479560007E-03 -0.140231500435778991E-03 0.000000000000000000E+00 0.420929781177558013E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.435014927902171013E-03 -0.435084926384880994E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.138286690846829989E-03 -0.140229504068509008E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.254915410590286010E-01 0.000000000000000000E+00 -0.161423359665572013E-03 -0.161424132710339996E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435069213980331018E-03 -0.435069574158249974E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.249645180764926991E-01 0.000000000000000000E+00 -0.161423119329893002E-03 -0.161424373046020010E-03 -0.666666666666666970E-02 -0.139258102070886994E-03 -0.139258092844452003E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319784847376137008E-01 -0.161423139773135993E-03 -0.161424352602775989E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435059186696719997E-03 -0.435079828747282984E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.322084822393016029E-01 0.000000000000000000E+00 -0.435016615284991993E-03 -0.435083166852704025E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.138285779894368007E-03 -0.140230415020971993E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.286738419021206996E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423325593204012E-03 -0.161424166782707997E-03 -0.161423747542489989E-03 -0.161423744833421993E-03 -0.161423325604668989E-03 -0.161424166771242993E-03 -0.666666666666666970E-02 -0.435017582368835013E-03 -0.435122368257235984E-03 -0.138274982048601996E-03 -0.140241212866737001E-03 -0.435000864585250997E-03 -0.435139462305625011E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.238543465439772992E-01 -0.666666666666666970E-02 -0.166666666666667011E-01 0.419834075447891031E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423305237978992E-03 -0.161424187137933992E-03 -0.666666666666666970E-02 -0.435069372007444006E-03 -0.435069412570366005E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.256581825663056012E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.434980761705325996E-03 -0.435120554118331997E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138286771391029988E-03 -0.140229423524309009E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.256868309172346015E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.434980606570965994E-03 -0.435120714326099026E-03 -0.666666666666666970E-02 -0.140229346218551998E-03 -0.138286848696788002E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.256614027694726000E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435029702345694975E-03 -0.435069519599524978E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.138320910572908000E-03 -0.140195284342430998E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320997009695584007E-01 0.000000000000000000E+00 -0.161423314617753998E-03 -0.161424177758158011E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435066266755664011E-03 -0.435072587790152974E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.257749746704468014E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435018010012214000E-03 -0.435081712497056015E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.138319195561076991E-03 -0.140196999354262006E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320996637608070984E-01 -0.161423313627734002E-03 -0.161424178748178007E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069045666087995E-03 -0.435069746265178998E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.255649837723719991E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423599069663996E-03 -0.161423893306248013E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.435941020866372994E-03 -0.434216890957876992E-03 0.000000000000000000E+00 0.321778215180335975E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.133203886290680012E-03 -0.554477536098311953E-03 -0.310287644885220981E-03 -0.560775130597174036E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.258308431809457992E-01 0.000000000000000000E+00 -0.200815543944558010E-03 -0.626132470433443992E-03 -0.666666666666666970E-02 -0.166666666666667011E-01 -0.446184483319574977E-03 -0.411111793506487000E-03 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.421914017926215967E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.435150874851761001E-03 -0.434951664732459008E-03 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.138419555564682994E-03 -0.140096639350656004E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.251983574296692006E-01 0.000000000000000000E+00 -0.874932041930874000E-05 -0.678932101969682953E-03 -0.666666666666666970E-02 -0.139012530212417987E-03 -0.139503664702921010E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319960855039493966E-01 0.000000000000000000E+00 -0.161423534312431000E-03 -0.161423958063482012E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.422822405468364991E-03 -0.447571581638852981E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.251635090367436989E-01 -0.161499426491679002E-03 -0.161348065884233007E-03 -0.666666666666666970E-02 -0.431483288576659993E-03 -0.438741647262608995E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.312157284528267985E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161424179076904013E-03 -0.161423313299007996E-03 -0.161423313309436991E-03 -0.161424179066474991E-03 -0.161423748561558987E-03 -0.161423743814352995E-03 -0.166666666666667011E-01 -0.435069198160920999E-03 -0.435069590334165020E-03 -0.138284743432991999E-03 -0.140231451482346998E-03 -0.435089075923097014E-03 -0.435050142023235000E-03 0.000000000000000000E+00 0.420929803448919018E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.435014018174735974E-03 -0.435085875041650019E-03 -0.666666666666666970E-02 -0.138286696532327987E-03 -0.140229498383011010E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.256871018283196008E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435049188665910004E-03 -0.435049198945112027E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.139258096875932990E-03 -0.139258098039406008E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.252598186653136998E-01 0.000000000000000000E+00 -0.161423313379014988E-03 -0.161424178996896994E-03 -0.666666666666666970E-02 -0.435069414335974010E-03 -0.435069370280600999E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.320929801358365019E-01 -0.435014929695106008E-03 -0.435084924514935985E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.138286694969620992E-03 -0.140229499945718005E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.320996579150382988E-01 0.000000000000000000E+00 -0.161423313471394000E-03 -0.161424178904518009E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069382051928022E-03 -0.435069402299393022E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.295485029250618994E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435016114097582006E-03 -0.435083689427570000E-03 -0.435055716891851981E-03 -0.435042938371768973E-03 -0.435014924444715012E-03 -0.435084929989965974E-03 -0.666666666666666970E-02 -0.138321048329605000E-03 -0.140195146585733998E-03 -0.138900264540687006E-03 -0.139615930374651991E-03 -0.138286695606836994E-03 -0.140229499308502004E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.238543465439772992E-01 -0.666666666666666970E-02 -0.166666666666667011E-01 0.420929804454360026E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.435014924421165002E-03 -0.435084930014524021E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.138286695611057007E-03 -0.140229499304281991E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.000000000000000000E+00 0.257458287849329989E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.435049188237201015E-03 -0.435049199392181004E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.139258097270754996E-03 -0.139258097644584002E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.255775463582341989E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423313378621992E-03 -0.161424178997289990E-03 -0.666666666666666970E-02 -0.435069399485626008E-03 -0.435069384803689026E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.257404831787951989E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435042938371768973E-03 -0.435055716891853011E-03 0.000000000000000000E+00 -0.138910376033074995E-03 -0.139605818882264002E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320997361628787004E-01 0.000000000000000000E+00 -0.161423315571431998E-03 -0.161424176804480011E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069384827771000E-03 -0.435069399460985995E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.256633780151170991E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423313378029990E-03 -0.161424178997881992E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.435069376688627994E-03 -0.435069407783549012E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.322089281648140976E-01 -0.435014924425583993E-03 -0.435084930009956982E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.138286694981505013E-03 -0.140229499933834011E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.256614590292298002E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435016112049637998E-03 -0.435083691563155003E-03 -0.166666666666667011E-01 -0.138320998722293008E-03 -0.140195196193045990E-03 0.000000000000000000E+00 0.319837098823795019E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.161423313471231993E-03 -0.161424178904679989E-03 -0.435069384800439022E-03 -0.435069399488948003E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.256582249675047017E-01 0.000000000000000000E+00 -0.434980660275586010E-03 -0.435120659873482023E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 -0.138286858201177004E-03 -0.140229336714161993E-03 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.419834073409681013E-01 0.000000000000000000E+00 -0.161423305232394999E-03 -0.161424187143517010E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435069384650815002E-03 -0.435069399641999023E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.253691078101502994E-01 0.000000000000000000E+00 -0.434980641228884999E-03 -0.435120678183400979E-03 -0.666666666666666970E-02 -0.140229336714165002E-03 -0.138286858201173995E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319837098823795019E-01 0.000000000000000000E+00 -0.161423313471231993E-03 -0.161424178904679989E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069384800325018E-03 -0.435069399489064988E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.253136149492911994E-01 -0.435016112049640979E-03 -0.435083691563150992E-03 -0.666666666666666970E-02 -0.138320998722293008E-03 -0.140195196193045990E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.312154838512909011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423744465432012E-03 -0.161423747910479997E-03 -0.161423311121030010E-03 -0.161424181254881999E-03 -0.161424181258488001E-03 -0.161423311117424008E-03 -0.166666666666667011E-01 0.000000000000000000E+00 -0.435082183658245017E-03 -0.435056882388395007E-03 -0.435077576153311009E-03 -0.435061388362830001E-03 -0.138286177937990999E-03 -0.140230016977347998E-03 0.000000000000000000E+00 0.420359769758308008E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.434988625092396007E-03 -0.435112371917261017E-03 -0.666666666666666970E-02 -0.138123103593930992E-03 -0.140393091321408005E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.254927196052617995E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423391697327001E-03 -0.161424100678585008E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.429493592762821984E-03 -0.440767063383173984E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.251692612175913009E-01 0.000000000000000000E+00 -0.161423746187955991E-03 -0.161423746187955991E-03 -0.666666666666666970E-02 -0.440899213583507998E-03 -0.429390539341747025E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.318962392046409984E-01 -0.363404809254292002E-03 -0.507297969422810992E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.135240574414478994E-03 -0.143275620500860003E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.322093262366704020E-01 0.000000000000000000E+00 -0.435187611870158021E-03 -0.434916457449104998E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.138287300795975000E-03 -0.140228894119363997E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.286700106542116008E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423291382795011E-03 -0.161424200993116998E-03 -0.161423748962132008E-03 -0.161423743413780001E-03 -0.161423291392504990E-03 -0.161424200983406992E-03 -0.666666666666666970E-02 -0.435081950459870019E-03 -0.435057110654062009E-03 -0.138300165059673992E-03 -0.140216029855665006E-03 -0.435072720707763974E-03 -0.435066136833905980E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.238543465439772992E-01 -0.666666666666666970E-02 -0.166666666666667011E-01 0.419834073815130021E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423305233497009E-03 -0.161424187142415000E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435069380325788988E-03 -0.435069404064516009E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.256582188293747009E-01 0.000000000000000000E+00 -0.434980686274821977E-03 -0.435120632764312025E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138286845679548006E-03 -0.140229349235790991E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.256868342262811002E-01 0.000000000000000000E+00 -0.434980627630137003E-03 -0.435120692364288997E-03 -0.666666666666666970E-02 -0.140229339372730996E-03 -0.138286855542608001E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.256614658944893000E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435012511657230014E-03 -0.435087446025792977E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.138321094746602009E-03 -0.140195100168737991E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320996483486796005E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423313210778008E-03 -0.161424179165134001E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435068950004979999E-03 -0.435069844082024008E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.257759423107052008E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435016296291419009E-03 -0.435083499433781991E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.138321121808393998E-03 -0.140195073106944999E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320996585742336984E-01 0.000000000000000000E+00 -0.161423313488731992E-03 -0.161424178887179990E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069291046625011E-03 -0.435069495355377978E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.255844281626146988E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.327328574610849998E-03 -0.542038187704011951E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.138213770503833004E-03 -0.140302424411505993E-03 0.000000000000000000E+00 0.321601835856102983E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.946023952620347994E-04 -0.593079027126956976E-03 -0.133620312350716999E-03 -0.725519910394541970E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.257239980440517012E-01 0.000000000000000000E+00 0.196968326301319991E-04 -0.707378255019124025E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 -0.458132642260517973E-03 -0.413595834475461983E-03 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.423184714573687989E-01 0.000000000000000000E+00 -0.434738589247653001E-03 -0.435372752845944011E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.138688174968105987E-03 -0.139828019947233010E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.251897402149190985E-01 0.000000000000000000E+00 -0.646688794373514043E-04 -0.623012542951639965E-03 -0.666666666666666970E-02 -0.137976511090806987E-03 -0.140539683824532011E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319702861190497012E-01 0.000000000000000000E+00 -0.161422661148781000E-03 -0.161424831227131009E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.425027871300158022E-03 -0.445328608699814012E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.253132418654096990E-01 -0.429884568296530988E-03 -0.440422631514232989E-03 -0.666666666666666970E-02 -0.138411741521856003E-03 -0.140104453393482994E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.310595796671125010E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161430908664596994E-03 -0.161416583711314988E-03 -0.161367227947133000E-03 -0.161480264428779009E-03 -0.161480354961919997E-03 -0.161367137413992012E-03 -0.166666666666667011E-01 0.000000000000000000E+00 -0.438392776826464984E-03 -0.431826121382375026E-03 -0.436721410041992001E-03 -0.433458024629497019E-03 -0.139062002018278005E-03 -0.139454192897061995E-03 0.000000000000000000E+00 0.421404524821471990E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.402803578099727999E-03 -0.467868848878249025E-03 -0.666666666666666970E-02 0.000000000000000000E+00 0.227910657339421994E-03 -0.900841911728360008E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.256049262714956004E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.340826630558288979E-03 -0.529260041764853008E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138562349500047995E-03 -0.139953845415291002E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.253040215793172013E-01 0.000000000000000000E+00 -0.246116548137240024E-03 -0.598443155917590948E-03 -0.666666666666666970E-02 -0.140073789192540987E-03 -0.138442405722798010E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319703071004562994E-01 -0.161422660702825987E-03 -0.161424831673085995E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.426922177757430997E-03 -0.443395080957603976E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.320998407804300029E-01 0.000000000000000000E+00 -0.161423318608057989E-03 -0.161424173767853993E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.432673706188636982E-03 -0.437518364259369976E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.295486137458738987E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435073936139989976E-03 -0.435025467270096005E-03 -0.435055941094787983E-03 -0.435042723373920998E-03 -0.435014132966001985E-03 -0.435085755332848012E-03 -0.666666666666666970E-02 -0.138321121970870991E-03 -0.140195072944468007E-03 -0.138900280797982998E-03 -0.139615914117355999E-03 0.000000000000000000E+00 -0.138286771859713999E-03 -0.140229423055624998E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.238543465439772992E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 0.000000000000000000E+00 0.420929803055049012E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.435014930179625984E-03 -0.435084924009661019E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.138286695318050990E-03 -0.140229499597288007E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.257458286284305005E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435049187797943010E-03 -0.435049199850251001E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.139258097008661995E-03 -0.139258097906677002E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.255775441079788002E-01 0.000000000000000000E+00 -0.161423313317997011E-03 -0.161424179057914998E-03 -0.666666666666666970E-02 -0.435069409759835009E-03 -0.435069374755894995E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.257406249620084002E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435042723384703009E-03 -0.435055941080233978E-03 0.000000000000000000E+00 -0.138910497854425013E-03 -0.139605697060914011E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320997355336692033E-01 0.000000000000000000E+00 -0.161423315554604991E-03 -0.161424176821307994E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069370488565992E-03 -0.435069414123279022E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.256633754959206990E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423313307973996E-03 -0.161424179067938013E-03 0.000000000000000000E+00 -0.435085186467014016E-03 -0.435053945729502005E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.322092437021506001E-01 -0.435014144601562013E-03 -0.435085743160859985E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.138287341133016000E-03 -0.140228853782322998E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.254980067883627988E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161506286311511997E-03 -0.161341206064400012E-03 -0.166666666666667011E-01 -0.431364756974581974E-03 -0.438866309542754976E-03 0.000000000000000000E+00 0.319866973588324027E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.161423386153582989E-03 -0.161424106222328993E-03 -0.435249329951151978E-03 -0.434893408363733994E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.256606789218085003E-01 0.000000000000000000E+00 -0.434963065954236988E-03 -0.435139005528701976E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 -0.138291831741704991E-03 -0.140224363173634006E-03 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.419833492947222983E-01 0.000000000000000000E+00 -0.161423303638426010E-03 -0.161424188737485999E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435072120349480976E-03 -0.435066723923213975E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.253728413941171002E-01 0.000000000000000000E+00 -0.434979152196512009E-03 -0.435122229395894016E-03 -0.666666666666666970E-02 -0.140221824647693988E-03 -0.138294370267645010E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319851023354644978E-01 0.000000000000000000E+00 -0.161423349940120991E-03 -0.161424142435790991E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435344407954329007E-03 -0.434800430411269015E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.251600082854132011E-01 -0.161430149009246000E-03 -0.161417343366666009E-03 -0.666666666666666970E-02 -0.432078300694966016E-03 -0.438133229980573974E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320887275245489018E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435111004034048989E-03 -0.434989920105145983E-03 -0.435014834120551011E-03 -0.435085024172310027E-03 -0.435084968865059006E-03 -0.435014887158174024E-03 -0.166666666666667011E-01 -0.138286802031117000E-03 -0.140229392884223000E-03 -0.138288034558563993E-03 -0.140228160356775005E-03 -0.138900326717328012E-03 -0.139615868198011988E-03 0.000000000000000000E+00 0.419834068202609018E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423305219030012E-03 -0.161424187156881997E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435073349340887991E-03 -0.435065522020417015E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.000000000000000000E+00 0.256004806855422984E-01 0.000000000000000000E+00 -0.434981132067385973E-03 -0.435120167645717019E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138288861702206010E-03 -0.140227333213132987E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.253696619275626985E-01 0.000000000000000000E+00 -0.434980119724182024E-03 -0.435121221827560020E-03 -0.666666666666666970E-02 -0.140228210971901000E-03 -0.138287983943439000E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319694542067313001E-01 -0.161422588303278990E-03 -0.161424904072633994E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435109649626736015E-03 -0.435030036925322975E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320971379137156981E-01 0.000000000000000000E+00 -0.161423238729728995E-03 -0.161424253646182987E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435144484966556002E-03 -0.434995958682276988E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.296734541379750985E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.434149030098187011E-03 -0.435987412204495999E-03 -0.435031452984650986E-03 -0.435067691864950016E-03 -0.434950548419927988E-03 -0.435152046333653016E-03 -0.666666666666666970E-02 -0.138392945863299996E-03 -0.140123249052040005E-03 -0.138921815209089002E-03 -0.139594379706249995E-03 0.000000000000000000E+00 -0.138356769775533993E-03 -0.140159425139805004E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.238543465439772992E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 0.000000000000000000E+00 0.420931571567067994E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435014671747375992E-03 -0.435085193476965012E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.138287057706326004E-03 -0.140229137209012993E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.257455695212117017E-01 0.000000000000000000E+00 -0.435047022783618996E-03 -0.435051457582689993E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.139258438339454998E-03 -0.139257756575883999E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.255770773094191987E-01 0.000000000000000000E+00 -0.161423300794449003E-03 -0.161424191581463006E-03 -0.666666666666666970E-02 -0.435056164326672024E-03 -0.435082918027994002E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.257823402544687004E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435067703423035017E-03 -0.435031441124867973E-03 0.000000000000000000E+00 -0.138940670377310000E-03 -0.139575524538028997E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320998367437754992E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423318157696995E-03 -0.161424174218214987E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435046243704917976E-03 -0.435093061865261994E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.256631852001646017E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423307528138997E-03 -0.161424184847773012E-03 0.000000000000000000E+00 -0.438796404237050007E-03 -0.431424781290872027E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.323103002924175972E-01 0.000000000000000000E+00 -0.434856382728898011E-03 -0.435250190151687997E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.138421511330616989E-03 -0.140094683584722008E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.255452807930411988E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423207454343989E-03 -0.161424284921568996E-03 -0.166666666666667011E-01 -0.404324480174985020E-03 -0.466398849792140982E-03 0.000000000000000000E+00 0.318949373068843009E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.323838290351469984E-03 -0.544927583290811006E-03 -0.135274543056306012E-03 -0.143241651859033012E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.255622073519536988E-01 0.000000000000000000E+00 -0.114452729937741999E-03 -0.573228692451250047E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 0.218484783290129011E-03 -0.891416037679068000E-03 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.421663414565641992E-01 0.000000000000000000E+00 0.687960644515914959E-05 -0.694561028834151002E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.438096223299876001E-03 -0.432167718169455990E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.255893861003781004E-01 0.000000000000000000E+00 -0.330656818749315009E-03 -0.535638411162091960E-03 -0.666666666666666970E-02 -0.139701772394438010E-03 -0.138814422520901990E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319824507944573982E-01 0.000000000000000000E+00 -0.161423276715586998E-03 -0.161424215660325011E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.453058027683075014E-03 -0.417478867706813021E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.252771264600550986E-01 -0.434642415203527976E-03 -0.435473406290417023E-03 -0.666666666666666970E-02 -0.142683907585935987E-03 -0.135832287329404013E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.323033163369174023E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435248914259523019E-03 -0.434857618105281016E-03 -0.434883815526439026E-03 -0.435221605059342973E-03 -0.435144012463656023E-03 -0.434958245914109976E-03 -0.166666666666667011E-01 -0.138392691801889008E-03 -0.140123503113449990E-03 -0.138936896109547012E-03 -0.139579298805792012E-03 -0.138396653973258001E-03 -0.140119540942080996E-03 0.000000000000000000E+00 0.419134531555254014E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.160003730040957000E-03 -0.162843762334955009E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.439266035090063022E-03 -0.430980363867674026E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.000000000000000000E+00 0.256392673957523999E-01 0.000000000000000000E+00 -0.434777562142257996E-03 -0.435332391625641989E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138409864927514006E-03 -0.140106329987824992E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.254101279172769014E-01 0.000000000000000000E+00 -0.373644623806981997E-03 -0.492026351519330037E-03 -0.666666666666666970E-02 -0.908412435934918957E-03 0.235481181545980998E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.321268372004474001E-01 -0.200001267706564000E-03 -0.627372315602944992E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.127171097759715991E-03 -0.545760156629222998E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320995602660988991E-01 0.000000000000000000E+00 -0.161423310639775001E-03 -0.161424181736137008E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.437288207604906009E-03 -0.432899576991382993E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.296377168165362984E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.434989920176705983E-03 -0.435111003938864983E-03 -0.435084786849818019E-03 -0.435015061692732000E-03 -0.435015088357717974E-03 -0.435084759043768022E-03 -0.666666666666666970E-02 -0.138910479291025010E-03 -0.139605715624313987E-03 -0.138288220944727007E-03 -0.140227973970611991E-03 -0.138286985838064012E-03 -0.140229209077275013E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.238543465439772992E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 0.000000000000000000E+00 0.419834073428042021E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423305232445008E-03 -0.161424187143467001E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435069384775871976E-03 -0.435069399514123015E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.256582244991315003E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.434980660285921980E-03 -0.435120659862830007E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138286857240759006E-03 -0.140229337674580994E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.256868355803533989E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.434980641844436011E-03 -0.435120677541527977E-03 -0.666666666666666970E-02 -0.140229336589824008E-03 -0.138286858325514989E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.255642317604596984E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423743725482000E-03 -0.161423748650431012E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.435032785443368025E-03 -0.435106823406514024E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320996548652853000E-01 -0.161423313389232997E-03 -0.161424178986679012E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069349700356026E-03 -0.435069435379967977E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.256786747395171995E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423743712989009E-03 -0.161423748662923000E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.435032864937970982E-03 -0.435106742120615006E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320996543404327983E-01 -0.161423313375148994E-03 -0.161424179000762988E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069389550788983E-03 -0.435069394631554978E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.256124534411805993E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.422563580132133995E-03 -0.447998624216019981E-03 -0.166666666666667011E-01 -0.138286150728846990E-03 -0.140230044186492007E-03 0.000000000000000000E+00 0.319552376669786989E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.161419817025352993E-03 -0.161427675350558989E-03 -0.434132277678501012E-03 -0.436028351482909977E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.256464487088938001E-01 0.000000000000000000E+00 -0.434899896421036018E-03 -0.435204881377557976E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 -0.138261308742496992E-03 -0.140254886172842006E-03 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.419830852158453033E-01 0.000000000000000000E+00 -0.161423296248439004E-03 -0.161424196127473005E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435080307367357005E-03 -0.435058717463834973E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.253594622355668986E-01 0.000000000000000000E+00 -0.434950331860284995E-03 -0.435152283722859020E-03 -0.666666666666666970E-02 -0.140249996876446007E-03 -0.138266198038893993E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319571786575690020E-01 0.000000000000000000E+00 -0.161420475113086009E-03 -0.161427017262827003E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.434308830151222019E-03 -0.435847642640102974E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.252660555352819012E-01 -0.423620518084152984E-03 -0.446908164705096009E-03 -0.666666666666666970E-02 -0.138290733115794992E-03 -0.140225461799544005E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.320831749900574031E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.434897829232994994E-03 -0.435207041701621983E-03 -0.435182072257829001E-03 -0.434921773469817985E-03 -0.434970483470178027E-03 -0.435131276171263001E-03 -0.166666666666667011E-01 -0.138283460799556990E-03 -0.140232734115782007E-03 -0.138290545167624998E-03 -0.140225649747715002E-03 -0.138898900526048002E-03 -0.139617294389290995E-03 0.000000000000000000E+00 0.421628681453787013E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.397188434436698007E-04 -0.727400265832661014E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435230978617207986E-03 -0.434916855000890001E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.000000000000000000E+00 0.256973883757283007E-01 0.000000000000000000E+00 -0.435443117881719019E-03 -0.434671353469976026E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138672805722981989E-03 -0.139843389192358011E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.253811407347032010E-01 0.000000000000000000E+00 -0.111660189923879004E-03 -0.576021232465113043E-03 -0.666666666666666970E-02 -0.807402873679339953E-03 0.113200250901337992E-04 -0.166666666666667011E-01 0.000000000000000000E+00 0.318951479564182974E-01 -0.334801618998244014E-03 -0.534674353093398985E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.135236534922172994E-03 -0.143279659993166003E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320991543625473996E-01 0.000000000000000000E+00 -0.161423299320882996E-03 -0.161424193055029988E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.439089523234416986E-03 -0.431138190963970024E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.292550790147567998E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423735549563995E-03 -0.161423756826347987E-03 -0.161424083758227992E-03 -0.161423408617683990E-03 -0.161423408591030993E-03 -0.161424083784880988E-03 -0.666666666666666970E-02 -0.434939106055095008E-03 -0.435202598868165004E-03 -0.434989974486818000E-03 -0.435150588304691020E-03 -0.434898044278314978E-03 -0.435244580342165975E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.238543465439772992E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 0.000000000000000000E+00 0.420929736779580982E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.435014942481458009E-03 -0.435084911182310021E-03 -0.666666666666666970E-02 -0.138286681738491990E-03 -0.140229513176847007E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.255502900122650013E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423360190818011E-03 -0.161424132185093998E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435068884552577983E-03 -0.435069911008902977E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.252824734873780994E-01 0.000000000000000000E+00 -0.161423118220565989E-03 -0.161424374155345993E-03 -0.666666666666666970E-02 -0.139258110569624000E-03 -0.139258084345714997E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.255485447377079983E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423302756024003E-03 -0.161424189619888006E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.434872688531394978E-03 -0.435270525342987020E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.322052497845927016E-01 -0.435023979094238992E-03 -0.435075488205511978E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.138279010834734011E-03 -0.140237184080605013E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.258530259445254006E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.434958189051660006E-03 -0.435144092590821993E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.138909064929736007E-03 -0.139607129985603993E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320994819723091004E-01 -0.161423308725767013E-03 -0.161424183650144996E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435080068461045984E-03 -0.435058950989111021E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.256614731138163008E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.429527300766954008E-03 -0.440793399874086025E-03 -0.166666666666667011E-01 -0.138412525549365991E-03 -0.140103669365973006E-03 0.000000000000000000E+00 0.319693244854589026E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.161422576318977012E-03 -0.161424916056934997E-03 -0.437741651795111979E-03 -0.432457178334932999E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.257682962742948989E-01 0.000000000000000000E+00 -0.372135275695876014E-03 -0.498733752802202964E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 -0.138908713201793988E-03 -0.139607481713545009E-03 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.418896828694112003E-01 0.000000000000000000E+00 -0.436533664253622989E-03 -0.433624016341012007E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.134626819469842998E-03 -0.143889375445495999E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.252833740154079986E-01 0.000000000000000000E+00 0.378159457418904982E-04 -0.725497368130881976E-03 -0.666666666666666970E-02 -0.597713125202172023E-03 -0.752181291867666958E-04 -0.166666666666667011E-01 0.000000000000000000E+00 0.321707087590768978E-01 0.000000000000000000E+00 -0.981956570690248044E-04 -0.589485765319966958E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.242422521139187011E-03 -0.626419769058526952E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 0.000000000000000000E+00 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.252392538739079006E-01 -0.333702074783931012E-03 -0.536013663819399954E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.138219319560300990E-03 -0.140296875355038007E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.310595586219111000E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161431110154876994E-03 -0.161416382221035991E-03 -0.161367182761019008E-03 -0.161480309614893001E-03 -0.161480399925954988E-03 -0.161367092449956993E-03 -0.166666666666667011E-01 -0.438417298132810025E-03 -0.431802193377014986E-03 -0.436709554426625997E-03 -0.433469595476341000E-03 -0.139063297967854010E-03 -0.139452896947484987E-03 0.000000000000000000E+00 0.421404520292116974E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.402798477196058982E-03 -0.467873676471954026E-03 -0.666666666666666970E-02 0.000000000000000000E+00 0.227909367425477999E-03 -0.900840621814416988E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.000000000000000000E+00 0.256044922875989997E-01 0.000000000000000000E+00 -0.340827301030340014E-03 -0.529259221222554946E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138561533653603990E-03 -0.139954661261736010E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.253040211801738996E-01 0.000000000000000000E+00 -0.246116211180549989E-03 -0.598443499755276051E-03 -0.666666666666666970E-02 -0.140073790975437003E-03 -0.138442403939901995E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319703070963996000E-01 -0.161422660702506988E-03 -0.161424831673404994E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.426922132602362023E-03 -0.443395127058409007E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.321002728756756978E-01 0.000000000000000000E+00 -0.161423329712289998E-03 -0.161424162663622011E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.432632949803903998E-03 -0.437559988660470999E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.296364413425855011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435207087979181024E-03 -0.434897780856480991E-03 -0.435078144333426976E-03 -0.435021431652866995E-03 -0.435027766934605010E-03 -0.435071537880043005E-03 -0.666666666666666970E-02 -0.138910179724992006E-03 -0.139606015190346991E-03 -0.138289402276009992E-03 -0.140226792639329005E-03 -0.138286064088546994E-03 -0.140230130826792004E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.238543465439772992E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 0.000000000000000000E+00 0.419834145316453980E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423305428603012E-03 -0.161424186947310000E-03 -0.666666666666666970E-02 -0.435068797728623978E-03 -0.435069999793927990E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.256581977277514015E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.434982640822920976E-03 -0.435118594642042004E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138286799402304007E-03 -0.140229395513034991E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.256861835048418988E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.434980941091875014E-03 -0.435120365734433022E-03 -0.666666666666666970E-02 -0.140230675700951006E-03 -0.138285519214387991E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.255648151652009999E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423744258310013E-03 -0.161423748117601996E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.434971272637178004E-03 -0.435169719904374022E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320996755017593990E-01 -0.161423313937937010E-03 -0.161424178437974999E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435067422348721020E-03 -0.435071406160561979E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.256787278728396011E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423752315616002E-03 -0.161423740060296007E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.434863912658681987E-03 -0.435279496392151021E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.321000539265607968E-01 -0.161423323758083008E-03 -0.161424168617829001E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435021975200768020E-03 -0.435117876636681000E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.254980067884808988E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161506286650737010E-03 -0.161341205725174999E-03 -0.166666666666667011E-01 -0.431364756994681022E-03 -0.438866309522160990E-03 0.000000000000000000E+00 0.319866973712260028E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.161423386153855991E-03 -0.161424106222056993E-03 -0.435249227300768993E-03 -0.434893508757877025E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.256606773361423013E-01 0.000000000000000000E+00 -0.434963273622003977E-03 -0.435138788986911998E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 -0.138291826977172987E-03 -0.140224367938166010E-03 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.419833531788693995E-01 0.000000000000000000E+00 -0.161423303747212010E-03 -0.161424188628699999E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435047841600877025E-03 -0.435091428270091996E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.253728372267130013E-01 0.000000000000000000E+00 -0.434980109299934025E-03 -0.435121231414349987E-03 -0.666666666666666970E-02 -0.140221834450205004E-03 -0.138294360465133993E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319851023684395025E-01 0.000000000000000000E+00 -0.161423349940937992E-03 -0.161424142434973990E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435344118977629016E-03 -0.434800713024421025E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.251600082863014003E-01 -0.161430149161919989E-03 -0.161417343213992996E-03 -0.666666666666666970E-02 -0.432078300658010010E-03 -0.438133230018339012E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.320880852120032020E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435006978817776007E-03 -0.435093216238015019E-03 -0.435023227600191012E-03 -0.435076271502717992E-03 -0.435080703092234026E-03 -0.435018977896097989E-03 -0.166666666666667011E-01 -0.138286342284362987E-03 -0.140229852630976986E-03 -0.138288877468917000E-03 -0.140227317446421998E-03 -0.138898309276879996E-03 -0.139617885638460004E-03 0.000000000000000000E+00 0.419834046918393031E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423305160284008E-03 -0.161424187215628001E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435057325047151988E-03 -0.435081731065996996E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.256008015072015996E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.434979053168488001E-03 -0.435122335339392023E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138289510792666997E-03 -0.140226684122673003E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.253691080193319991E-01 0.000000000000000000E+00 -0.434981271968578004E-03 -0.435120020450890975E-03 -0.666666666666666970E-02 -0.140229335054007991E-03 -0.138286859861331006E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319837881512948022E-01 -0.161423315571830009E-03 -0.161424176804082000E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069394631856982E-03 -0.435069389550488985E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.320970354177166975E-01 0.000000000000000000E+00 -0.161423235472326997E-03 -0.161424256903585988E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435168272526173982E-03 -0.434972697148703995E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.295545368095916985E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.434122085949861976E-03 -0.436015593157589986E-03 -0.435101402844330994E-03 -0.434999127125352007E-03 -0.434882942033044983E-03 -0.435222550914836999E-03 -0.666666666666666970E-02 -0.138329236201211012E-03 -0.140186958714128013E-03 -0.138899088561680992E-03 -0.139617106353658006E-03 -0.138290618264299002E-03 -0.140225576651039995E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.238543465439772992E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 0.000000000000000000E+00 0.420931657846550997E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.435014640889280020E-03 -0.435085225654339980E-03 -0.666666666666666970E-02 -0.138287075366651002E-03 -0.140229119548687995E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.257458666890181993E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435045894309902018E-03 -0.435052634377398005E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.139258454546935994E-03 -0.139257740368403004E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.255770740455284994E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423300836045991E-03 -0.161424191539865991E-03 -0.666666666666666970E-02 -0.435055528504503020E-03 -0.435083568186561014E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.257826760506618992E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.434999152127107014E-03 -0.435101368436342979E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.138940653027276996E-03 -0.139575541888062001E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320996072561336027E-01 -0.161423312108405990E-03 -0.161424180267505992E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435072662950887002E-03 -0.435066193259916003E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.256629422996789008E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423300977684998E-03 -0.161424191398227011E-03 0.000000000000000000E+00 -0.438825800967892977E-03 -0.431396059158853015E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.323107970182680976E-01 -0.434855364031651011E-03 -0.435251251813800982E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.138421931556495010E-03 -0.140094263358843987E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.255642320215948987E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423743718458999E-03 -0.161423748657454013E-03 -0.166666666666667011E-01 -0.435032852641439025E-03 -0.435106754694577981E-03 0.000000000000000000E+00 0.319837065352637992E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.161423313381099990E-03 -0.161424178994811992E-03 -0.435069595976944991E-03 -0.435069192642484977E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.256582283981556004E-01 0.000000000000000000E+00 -0.434978918685956012E-03 -0.435122475932554027E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 -0.138286871097143010E-03 -0.140229323818195987E-03 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.419833928488543007E-01 0.000000000000000000E+00 -0.161423304820364997E-03 -0.161424187555547012E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435207587129756010E-03 -0.434934243879776989E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.253691146856406986E-01 0.000000000000000000E+00 -0.434977844183998008E-03 -0.435123594722203980E-03 -0.666666666666666970E-02 -0.140229316757458007E-03 -0.138286878157881994E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319837065219248998E-01 0.000000000000000000E+00 -0.161423313380713987E-03 -0.161424178995198998E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069861087792979E-03 -0.435068933373936994E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.252163879413985006E-01 -0.161423743718170005E-03 -0.161423748657742004E-03 -0.666666666666666970E-02 -0.435032852613000023E-03 -0.435106754723656988E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320883554506867022E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435127306548502995E-03 -0.434974286158534991E-03 -0.435023129650981017E-03 -0.435076373631457973E-03 -0.435052776118324998E-03 -0.435045758390568017E-03 -0.166666666666667011E-01 0.000000000000000000E+00 -0.138286581602824004E-03 -0.140229613312514993E-03 -0.138900368233470993E-03 -0.139615826681868004E-03 -0.138293378267740995E-03 -0.140222816647599005E-03 0.000000000000000000E+00 0.419834123874497020E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423305373013988E-03 -0.161424187002898997E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435065741772807020E-03 -0.435073124632554995E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.255992190832709000E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.434982794949810992E-03 -0.435118433992394990E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138286284757403011E-03 -0.140229910157936013E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.253696592428569005E-01 0.000000000000000000E+00 -0.434980341928489984E-03 -0.435120990165656005E-03 -0.666666666666666970E-02 -0.140228218876105011E-03 -0.138287976039234013E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319694542100162002E-01 -0.161422588303611000E-03 -0.161424904072301009E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435109462002123012E-03 -0.435030220343913024E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.320999013551653001E-01 0.000000000000000000E+00 -0.161423319891360013E-03 -0.161424172484551996E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435048884195216989E-03 -0.435090361870336983E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.297460083034186001E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435093184138760016E-03 -0.435007004727823009E-03 -0.435044582892759007E-03 -0.435054001407353006E-03 -0.435082620665603974E-03 -0.435017135654766987E-03 -0.666666666666666970E-02 -0.138933893426106987E-03 -0.139582301489232010E-03 -0.138357128758214001E-03 -0.140159066157124996E-03 0.000000000000000000E+00 -0.138352953476254988E-03 -0.140163241439084010E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.238543465439772992E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 0.000000000000000000E+00 0.419834091612526009E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423305282162996E-03 -0.161424187093749013E-03 -0.666666666666666970E-02 -0.435069239563229014E-03 -0.435069548000012994E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.256578127707293990E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.434981761685145974E-03 -0.435119511480767007E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138286013333851002E-03 -0.140230181581488998E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.256868183233514985E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.434980125799632985E-03 -0.435121215670877001E-03 -0.666666666666666970E-02 -0.140229372680005009E-03 -0.138286822235333988E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.255642631986222994E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423746696742987E-03 -0.161423745679168995E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.434938825463917979E-03 -0.435202898680430025E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320998066618511033E-01 -0.161423317331866001E-03 -0.161424175044046008E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435039227329621022E-03 -0.435100236286997026E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.256789254737601995E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423744043055008E-03 -0.161423748332858004E-03 0.000000000000000000E+00 -0.434994690186026999E-03 -0.435145776107332002E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320996673803692970E-01 -0.161423313722635004E-03 -0.161424178653277005E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435068125149740983E-03 -0.435070687523708016E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.255452807933189002E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423207454355997E-03 -0.161424284921556012E-03 -0.166666666666667011E-01 -0.404324471515191976E-03 -0.466398858585693999E-03 0.000000000000000000E+00 0.318949372864687011E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.323838542624367001E-03 -0.544927348767803980E-03 -0.135274539864154011E-03 -0.143241655051185013E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.255622073357968001E-01 0.000000000000000000E+00 -0.114453213670021004E-03 -0.573228208718970988E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 0.218486799119613006E-03 -0.891418053508551047E-03 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.421663421972207977E-01 0.000000000000000000E+00 0.686273215429911012E-05 -0.694544154543290989E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.437941742226114026E-03 -0.432315834894540991E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.255893865594537008E-01 0.000000000000000000E+00 -0.330660242956903980E-03 -0.535635480866768947E-03 -0.666666666666666970E-02 -0.139701781018268989E-03 -0.138814413897070008E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319824507952650994E-01 0.000000000000000000E+00 -0.161423276715632995E-03 -0.161424215660278987E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.453057648293402017E-03 -0.417479238786919984E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.252771264643981003E-01 -0.434642414870016003E-03 -0.435473406638131990E-03 -0.666666666666666970E-02 -0.142683907492472013E-03 -0.135832287422867011E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.320831993534842971E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435039282000798980E-03 -0.435059530140830008E-03 -0.435183177514903016E-03 -0.434920713595511989E-03 -0.434923578188304021E-03 -0.435180190251448023E-03 -0.166666666666667011E-01 -0.138283467016899007E-03 -0.140232727898440993E-03 -0.138898928565553995E-03 -0.139617266349785003E-03 -0.138290715294724011E-03 -0.140225479620615013E-03 0.000000000000000000E+00 0.421628681647200979E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.397187181120990971E-04 -0.727400140501091022E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435231404167815000E-03 -0.434916453249015015E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.256970881787325009E-01 0.000000000000000000E+00 -0.435443459120569022E-03 -0.434671026438330013E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138672556881986003E-03 -0.139843638033352994E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.253811403923139001E-01 0.000000000000000000E+00 -0.111659736914792001E-03 -0.576021685474199015E-03 -0.666666666666666970E-02 -0.807403253803167008E-03 0.113208142554363006E-04 -0.166666666666667011E-01 0.000000000000000000E+00 0.318951479398702983E-01 -0.334801708387352976E-03 -0.534674268837695009E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.135236533028903003E-03 -0.143279661886435995E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320995045206775992E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423308955499994E-03 -0.161424183420412991E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.439073955171491003E-03 -0.431153380462344989E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.292551256260864985E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423741996618989E-03 -0.161423750379292993E-03 -0.161424083526681006E-03 -0.161423408849231003E-03 -0.161423408822156008E-03 -0.161424083553757003E-03 -0.666666666666666970E-02 -0.434870224107475974E-03 -0.435273023345194990E-03 -0.434990359763353014E-03 -0.435150194228865013E-03 -0.434886571394757027E-03 -0.435256310073468009E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.238543465439772992E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 0.000000000000000000E+00 0.420929704392936990E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435014948460666999E-03 -0.435084904947665005E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.138286675090450010E-03 -0.140229519824888987E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.255502912848136997E-01 0.000000000000000000E+00 -0.161423360221235005E-03 -0.161424132154678007E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435068640548550009E-03 -0.435070160511034009E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.252823540608277005E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423118749632011E-03 -0.161424373626279998E-03 -0.666666666666666970E-02 -0.139258116844783998E-03 -0.139258078070556002E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.255489628695773005E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423314131588989E-03 -0.161424178244322993E-03 0.000000000000000000E+00 -0.434859092529006977E-03 -0.435284424993224019E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.322050771166806013E-01 0.000000000000000000E+00 -0.435024403864370983E-03 -0.435075045265772024E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.138278642730750004E-03 -0.140237552184589996E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.258528526278573000E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435045758287675984E-03 -0.435052776259060999E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.138908581714380013E-03 -0.139607613200959987E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320997445770579981E-01 -0.161423315795968994E-03 -0.161424176579942987E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069043735089990E-03 -0.435069748238998001E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.256614731136071000E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.429527301021726991E-03 -0.440793399609727991E-03 -0.166666666666667011E-01 -0.138412525548941011E-03 -0.140103669366398013E-03 0.000000000000000000E+00 0.319693244861690984E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.161422576319042010E-03 -0.161424916056869999E-03 -0.437741641876694004E-03 -0.432457188030005977E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.257682960389588010E-01 0.000000000000000000E+00 -0.372135294409653991E-03 -0.498733734440172968E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 -0.138908712464486990E-03 -0.139607482450852008E-03 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.418896816202265973E-01 0.000000000000000000E+00 -0.436537889268050986E-03 -0.433619959227105999E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.134626693225825994E-03 -0.143889501689513003E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.252833739842331998E-01 0.000000000000000000E+00 0.378158663248480029E-04 -0.725497288713840022E-03 -0.666666666666666970E-02 -0.597713400143132971E-03 -0.752178542458052050E-04 -0.166666666666667011E-01 0.000000000000000000E+00 0.321707087558135013E-01 0.000000000000000000E+00 -0.981956616589569036E-04 -0.589485760730035007E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.242422452004102012E-03 -0.626419835077194048E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 0.000000000000000000E+00 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.252392538736065999E-01 -0.333702076879594987E-03 -0.536013661829747045E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.138219319557457995E-03 -0.140296875357881002E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.321074799924774973E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.434939601065898027E-03 -0.435163477654634996E-03 -0.435157861406833973E-03 -0.434944986847371983E-03 -0.434989471025225001E-03 -0.435111473318402012E-03 -0.166666666666667011E-01 -0.138299620238519004E-03 -0.140216574676820996E-03 -0.138306111153669995E-03 -0.140210083761669002E-03 -0.138904474579713009E-03 -0.139611720335626992E-03 0.000000000000000000E+00 0.421630568906255984E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.382244495288109972E-04 -0.725905871917801953E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435871010405398983E-03 -0.434311639967858000E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.256952418884623009E-01 0.000000000000000000E+00 -0.435307159704155999E-03 -0.434801765636921985E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138667044406462996E-03 -0.139849150508876001E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.253756621166979016E-01 0.000000000000000000E+00 -0.110334788320004006E-03 -0.577346634068988027E-03 -0.666666666666666970E-02 -0.811801782227151991E-03 0.217125293756245991E-04 -0.166666666666667011E-01 0.000000000000000000E+00 0.318985675848939970E-01 -0.333010067147420014E-03 -0.536378952719955017E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.135533186849416004E-03 -0.142983008065922994E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320998187031011004E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423317205601008E-03 -0.161424175170311001E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.439006972327073976E-03 -0.431218847172978989E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.292402861508459996E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423747268413004E-03 -0.161423745107499005E-03 -0.161424199494236009E-03 -0.161423292881676000E-03 -0.161423292890866999E-03 -0.161424199485045010E-03 -0.666666666666666970E-02 -0.434862443268883999E-03 -0.435281002767785006E-03 -0.435100436115737983E-03 -0.435039032534641993E-03 0.000000000000000000E+00 -0.435036509827943995E-03 -0.435103015721981983E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.238543465439772992E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 0.000000000000000000E+00 0.420929537555077005E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.435015025801769976E-03 -0.435084824299273990E-03 -0.666666666666666970E-02 -0.138286640917195994E-03 -0.140229553998143003E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.255502682711579998E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423359671854992E-03 -0.161424132704057993E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435067434800663979E-03 -0.435071393428817989E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.252818652853033995E-01 0.000000000000000000E+00 -0.161423121023111999E-03 -0.161424371352800010E-03 -0.666666666666666970E-02 -0.139258148276853998E-03 -0.139258046638484999E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.255494975093474991E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423327915938004E-03 -0.161424164459974005E-03 0.000000000000000000E+00 -0.435032834826549976E-03 -0.435106772375089984E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.322088227833026985E-01 -0.435016604539718995E-03 -0.435083178015839003E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.138286481481243002E-03 -0.140229713434095995E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.258545382043442011E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435180234489963001E-03 -0.434923532533521007E-03 0.000000000000000000E+00 -0.138910062415604000E-03 -0.139606132499734998E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.321001796355565028E-01 -0.161423327064930995E-03 -0.161424165310980987E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435025887435454999E-03 -0.435113876200826026E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.255844281592568015E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.327328582483525979E-03 -0.542038180279041048E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.138213770482627013E-03 -0.140302424432712987E-03 0.000000000000000000E+00 0.321601835592413010E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.946024118762530064E-04 -0.593079010512739054E-03 -0.133620199164305990E-03 -0.725519994603599015E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.257239982411411987E-01 0.196968234580236002E-04 -0.707378245847014990E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 -0.458129631085333017E-03 -0.413598754070089024E-03 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.423184714605200005E-01 0.000000000000000000E+00 -0.434756036956663000E-03 -0.435354579774434021E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.138688118698557992E-03 -0.139828076216782008E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.251897443601684992E-01 0.000000000000000000E+00 -0.646690982568611981E-04 -0.623012324132130035E-03 -0.666666666666666970E-02 -0.137976768222350989E-03 -0.140539426692988008E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319702861271339983E-01 0.000000000000000000E+00 -0.161422661149470011E-03 -0.161424831226441998E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.425027766008080989E-03 -0.445328716144572018E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.253132418625061015E-01 -0.429884571295451002E-03 -0.440422628401466976E-03 -0.666666666666666970E-02 -0.138411741515959000E-03 -0.140104453399379997E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.312157365405992017E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423748574759013E-03 -0.161423743801152996E-03 -0.161423313379198001E-03 -0.161424178996714008E-03 -0.161424178996706988E-03 -0.161423313379204994E-03 -0.166666666666667011E-01 -0.435069382470465026E-03 -0.435069401871426007E-03 -0.435069376779887026E-03 -0.435069407690234008E-03 -0.138284693812444989E-03 -0.140231501102894008E-03 0.000000000000000000E+00 0.420929708077939987E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435014938579204977E-03 -0.435084915251974006E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.138286675893133993E-03 -0.140229519022206007E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.256866648616451004E-01 0.000000000000000000E+00 -0.435050846412010986E-03 -0.435047608857992002E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.139258078983040012E-03 -0.139258115932299013E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.252598190481393009E-01 0.000000000000000000E+00 -0.161423313207681012E-03 -0.161424179168232000E-03 -0.666666666666666970E-02 -0.435070119779517976E-03 -0.435068680382981010E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.320929799686702974E-01 -0.435014999741732006E-03 -0.435084851470821988E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.138286694533200995E-03 -0.140229500382138002E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.321001296867379007E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423325778209994E-03 -0.161424166597702991E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435030925452593018E-03 -0.435108724823487006E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.296363951915997008E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435163526863417986E-03 -0.434939554418941980E-03 -0.435078552482020999E-03 -0.435021040258217015E-03 -0.435027006817730023E-03 -0.435072330536212006E-03 -0.666666666666666970E-02 -0.138910218392951005E-03 -0.139605976522387993E-03 -0.138289242827388992E-03 -0.140226952087950006E-03 -0.138286026412109991E-03 -0.140230168503229007E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.238543465439772992E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 0.000000000000000000E+00 0.419834140961602986E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423305416680989E-03 -0.161424186959231996E-03 -0.666666666666666970E-02 -0.435068823290631025E-03 -0.435069973655738992E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.256581988626478005E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.434982526778275992E-03 -0.435118713563232987E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138286801913291003E-03 -0.140229393002047994E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.256862275407307994E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.434980912593391981E-03 -0.435120395436144002E-03 -0.666666666666666970E-02 -0.140230584882228997E-03 -0.138285610033110000E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.255647395300480987E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423744222123004E-03 -0.161423748153789005E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.434974854599588009E-03 -0.435166057532792997E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320996742791537990E-01 0.000000000000000000E+00 -0.161423313905322989E-03 -0.161424178470588993E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435067503898425021E-03 -0.435071322773375989E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.256787237707600986E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423750872133001E-03 -0.161423741503779008E-03 0.000000000000000000E+00 -0.434878052016917011E-03 -0.435265039122159018E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320999946846379988E-01 -0.161423322217426002E-03 -0.161424170158486007E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435025227850866988E-03 -0.435114550827602994E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.256614590240232011E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435016115295252983E-03 -0.435083688178646997E-03 -0.166666666666667011E-01 -0.138320998711983004E-03 -0.140195196203355993E-03 0.000000000000000000E+00 0.319837098932621994E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.161423313471531992E-03 -0.161424178904379990E-03 -0.435069292513270988E-03 -0.435069493855684000E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.256582235323178989E-01 0.000000000000000000E+00 -0.434980856857106006E-03 -0.435120454886334974E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 -0.138286853775869010E-03 -0.140229341139469987E-03 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.419834109605791983E-01 0.000000000000000000E+00 -0.161423305333356989E-03 -0.161424187042554993E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435046434997871025E-03 -0.435092866540485025E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.253691040607966004E-01 0.000000000000000000E+00 -0.434981527116148989E-03 -0.435119754440041018E-03 -0.666666666666666970E-02 -0.140229345868751000E-03 -0.138286849046589000E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319837099126562968E-01 0.000000000000000000E+00 -0.161423313472065989E-03 -0.161424178903845993E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069123307102977E-03 -0.435069666874682976E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.253136149345605001E-01 -0.435016121073636018E-03 -0.435083682152984010E-03 -0.666666666666666970E-02 -0.138320998693124988E-03 -0.140195196222214010E-03 -0.166666666666667011E-01 0.312157824691738014E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423746615296988E-03 -0.161423745760614994E-03 -0.161423313748345991E-03 -0.161424178627565991E-03 -0.161424178629453993E-03 -0.161423313746457989E-03 -0.166666666666667011E-01 -0.435045651504759002E-03 -0.435093667508772001E-03 -0.435080766409200008E-03 -0.435058268328494021E-03 -0.138278294452393003E-03 -0.140237900462945994E-03 0.000000000000000000E+00 0.420929613697930971E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.435018968638475976E-03 -0.435080712727501978E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.138286650710325003E-03 -0.140229544205013995E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.000000000000000000E+00 0.256871012844454988E-01 0.000000000000000000E+00 -0.435049191356535016E-03 -0.435049196139260014E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.139258062358015006E-03 -0.139258132557323991E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.252598188674287988E-01 0.000000000000000000E+00 -0.161423313384250004E-03 -0.161424178991662005E-03 -0.666666666666666970E-02 -0.435070776497347995E-03 -0.435068038137203020E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.320929799834977977E-01 -0.435014991950716994E-03 -0.435084859595236974E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.138286694574832000E-03 -0.140229500340508000E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320996579186692971E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423313471493991E-03 -0.161424178904418994E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069357599841020E-03 -0.435069427302476014E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.295485029193944988E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435016115212415004E-03 -0.435083688265029017E-03 -0.435055716856611019E-03 -0.435042938405563989E-03 -0.435014924529298014E-03 -0.435084929901764013E-03 -0.666666666666666970E-02 -0.138321048324070988E-03 -0.140195146591268009E-03 -0.138900264540679010E-03 -0.139615930374659987E-03 -0.138286695603002008E-03 -0.140229499312336989E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.238543465439772992E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 0.000000000000000000E+00 0.420929804452524967E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.435014924421641997E-03 -0.435084930014025993E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.138286695610681005E-03 -0.140229499304657992E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.257458287848717007E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435049188240248003E-03 -0.435049199389004021E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.139258097270406994E-03 -0.139258097644932003E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.255775463585184992E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423313378629012E-03 -0.161424178997282997E-03 -0.666666666666666970E-02 -0.435069399499345990E-03 -0.435069384790270994E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.257404831595632010E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435042938405561007E-03 -0.435055716856614001E-03 0.000000000000000000E+00 -0.138910376016605991E-03 -0.139605818898733006E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320997361629841993E-01 -0.161423315571435007E-03 -0.161424176804478005E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069384822633020E-03 -0.435069399466241015E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.256633780153988009E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423313378038013E-03 -0.161424178997873996E-03 0.000000000000000000E+00 -0.435069374982946988E-03 -0.435069409527666015E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.322089281358309970E-01 -0.435014924516908999E-03 -0.435084929914729011E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.138286694922169990E-03 -0.140229499993169008E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.256614590292298002E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435016112049646021E-03 -0.435083691563146980E-03 -0.166666666666667011E-01 -0.138320998722293008E-03 -0.140195196193045990E-03 0.000000000000000000E+00 0.319837098823795019E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.161423313471231993E-03 -0.161424178904679989E-03 -0.435069384800128994E-03 -0.435069399489265024E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.256582249674996987E-01 0.000000000000000000E+00 -0.434980660277106983E-03 -0.435120659871895998E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 -0.138286858201160009E-03 -0.140229336714178988E-03 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.419834073409849004E-01 0.000000000000000000E+00 -0.161423305232394999E-03 -0.161424187143517010E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435069384518094995E-03 -0.435069399777710994E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.253691078101383992E-01 0.000000000000000000E+00 -0.434980641232482002E-03 -0.435120678179650019E-03 -0.666666666666666970E-02 -0.140229336714196010E-03 -0.138286858201142987E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319837098823795990E-01 0.000000000000000000E+00 -0.161423313471231993E-03 -0.161424178904679989E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069384799506987E-03 -0.435069399489901993E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.253136149492911994E-01 -0.435016112049662988E-03 -0.435083691563128007E-03 -0.666666666666666970E-02 -0.138320998722293008E-03 -0.140195196193046992E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.320885977646230974E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435028541278858008E-03 -0.435070730506436016E-03 -0.435014782666575002E-03 -0.435085077833358993E-03 -0.435084998915685002E-03 -0.435014858345719973E-03 -0.166666666666667011E-01 0.000000000000000000E+00 -0.138286722125585996E-03 -0.140229472789753001E-03 -0.138287952903743989E-03 -0.140228242011595009E-03 -0.138900254709783987E-03 -0.139615940205555010E-03 0.000000000000000000E+00 0.419834115616333006E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423305349811004E-03 -0.161424187026102008E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435070335762563986E-03 -0.435068469166236015E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.255991790107528011E-01 0.000000000000000000E+00 -0.434982884474892026E-03 -0.435118340650319981E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138286203203484011E-03 -0.140229991711855013E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.253696597865739996E-01 0.000000000000000000E+00 -0.434979979753249990E-03 -0.435121367820745985E-03 -0.666666666666666970E-02 -0.140228217416730994E-03 -0.138287977498608003E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319694542079494021E-01 -0.161422588303417009E-03 -0.161424904072495000E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435109502026177015E-03 -0.435030181217087981E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320997612467455035E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423316150031992E-03 -0.161424176225879990E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435046027403482012E-03 -0.435093283082004022E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.296643788713649992E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435041466166206979E-03 -0.435057251216224993E-03 -0.434987713518249003E-03 -0.435113298977872021E-03 -0.435084291440460005E-03 -0.435015533322462983E-03 -0.666666666666666970E-02 -0.138384828446226003E-03 -0.140131366469113997E-03 -0.138922740970408006E-03 -0.139593453944930992E-03 -0.138352603070340010E-03 -0.140163591844998987E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.238543465439772992E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 0.000000000000000000E+00 0.420929717450221966E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.435014955543717996E-03 -0.435084897561344988E-03 -0.666666666666666970E-02 -0.138286677768216005E-03 -0.140229517147122992E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.000000000000000000E+00 0.257455320601093010E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435050355484345984E-03 -0.435048079623292984E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.139258080653318999E-03 -0.139258114262021002E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.255775497212881002E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423313341254991E-03 -0.161424179034656991E-03 -0.666666666666666970E-02 -0.435070056149005984E-03 -0.435068742611218005E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.257402390203077015E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435113345596286011E-03 -0.434987675900872992E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.138910391927832998E-03 -0.139605802987505999E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320999657878337022E-01 -0.161423321565590998E-03 -0.161424170810321011E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435043064748192026E-03 -0.435096312338415002E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.256636236446474014E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423319781588012E-03 -0.161424172594323997E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.435040266525950976E-03 -0.435099173616122011E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.322086798404708011E-01 -0.435015968986542014E-03 -0.435083840783048986E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.138286185919272000E-03 -0.140230008996066998E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.255452807944169004E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423207454397007E-03 -0.161424284921515002E-03 -0.166666666666667011E-01 -0.404324467010457988E-03 -0.466398863159433010E-03 0.000000000000000000E+00 0.318949372706568007E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.323838600968767004E-03 -0.544927294428741000E-03 -0.135274538150394994E-03 -0.143241656764945006E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.255622073245997013E-01 0.000000000000000000E+00 -0.114453247275369001E-03 -0.573228175113622992E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 0.218487273219374002E-03 -0.891418527608312016E-03 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.421663423170699977E-01 0.000000000000000000E+00 0.686068254293191984E-05 -0.694542104931923968E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.437928115910693992E-03 -0.432328899183500985E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.255893867272030014E-01 0.000000000000000000E+00 -0.330661927924627975E-03 -0.535633960374462992E-03 -0.666666666666666970E-02 -0.139701786050046989E-03 -0.138814408865292008E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319824508284424977E-01 0.000000000000000000E+00 -0.161423276716622004E-03 -0.161424215659290005E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.453057324015285993E-03 -0.417479555947782987E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.252771264598301987E-01 -0.434642424564362999E-03 -0.435473396530201005E-03 -0.666666666666666970E-02 -0.142683907588298003E-03 -0.135832287327041998E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.310595659754696989E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161480379356609011E-03 -0.161367113019302998E-03 -0.161367201132704989E-03 -0.161480291243207996E-03 -0.161431097435676003E-03 -0.161416394940237009E-03 -0.166666666666667011E-01 -0.438433354062566980E-03 -0.431786524598485992E-03 -0.139063303639515990E-03 -0.139452891275823008E-03 -0.436729888333051012E-03 -0.433449749920077998E-03 0.000000000000000000E+00 0.421404522783956001E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.402800893973209023E-03 -0.467871394310760984E-03 -0.666666666666666970E-02 0.000000000000000000E+00 0.227909597161434006E-03 -0.900840851550373050E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.256049312238144991E-01 0.000000000000000000E+00 -0.340826929960757021E-03 -0.529259757575146952E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138562359099061988E-03 -0.139953835816277009E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.253040222377561991E-01 0.000000000000000000E+00 -0.246116167738164992E-03 -0.598443392983163958E-03 -0.666666666666666970E-02 -0.140073786324521011E-03 -0.138442408590818013E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319703070887365007E-01 -0.161422660701845001E-03 -0.161424831674067008E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.426922283598156992E-03 -0.443394972905718991E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320998325318406982E-01 0.000000000000000000E+00 -0.161423318386659000E-03 -0.161424173989254012E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.432674210328745981E-03 -0.437517849506738994E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.296374388638415004E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435070730494953989E-03 -0.435028541287397997E-03 -0.435085047523665978E-03 -0.435014811728346980E-03 -0.435014733314390012E-03 -0.435085129293168984E-03 -0.666666666666666970E-02 -0.138910408020893991E-03 -0.139605786894445006E-03 -0.138288015962704006E-03 -0.140228178952634991E-03 -0.138286786003025006E-03 -0.140229408912313991E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.238543465439772992E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 0.000000000000000000E+00 0.419834073379218020E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423305232311000E-03 -0.161424187143602012E-03 -0.666666666666666970E-02 -0.435069384517251974E-03 -0.435069399778572989E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.256582252366890015E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.434980659785321009E-03 -0.435120660384639003E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138286858754340991E-03 -0.140229336160998006E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.256868357246730003E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.434980640625951985E-03 -0.435120678812035981E-03 -0.666666666666666970E-02 -0.140229336292177010E-03 -0.138286858623161988E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.255642315719874988E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423743707386991E-03 -0.161423748668524991E-03 0.000000000000000000E+00 -0.435032940295220990E-03 -0.435106665066769985E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320996541101558966E-01 -0.161423313368980995E-03 -0.161424179006930987E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069405363546012E-03 -0.435069379055302017E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.256786736305051992E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423743710730996E-03 -0.161423748665181989E-03 0.000000000000000000E+00 -0.435032918551319986E-03 -0.435106687300586010E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320996542492566969E-01 -0.161423313372716993E-03 -0.161424179003194989E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069400469630010E-03 -0.435069383841369989E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.254980067883639992E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161506286329310991E-03 -0.161341206046600991E-03 -0.166666666666667011E-01 -0.431364756977960023E-03 -0.438866309539295992E-03 0.000000000000000000E+00 0.319866973594409992E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.161423386153596000E-03 -0.161424106222316009E-03 -0.435249325540191991E-03 -0.434893412677740991E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.256606788604577009E-01 0.000000000000000000E+00 -0.434963057292216018E-03 -0.435139014560906973E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 -0.138291831602560002E-03 -0.140224363312778995E-03 0.000000000000000000E+00 0.333333333333332982E-01 0.000000000000000000E+00 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.419833493286519976E-01 0.000000000000000000E+00 -0.161423303639283993E-03 -0.161424188736628991E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435072704181455982E-03 -0.435066152962260003E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.253728412360505994E-01 0.000000000000000000E+00 -0.434979171689485976E-03 -0.435122209070102999E-03 -0.666666666666666970E-02 -0.140221824975804004E-03 -0.138294369939534993E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319851023368492998E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423349940155008E-03 -0.161424142435757001E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435344396466284009E-03 -0.434800441646335982E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 0.000000000000000000E+00 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.251600082854456994E-01 -0.161430149015849008E-03 -0.161417343360063001E-03 -0.666666666666666970E-02 -0.432078300695953020E-03 -0.438133229979561004E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.312157350718397994E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423748575108993E-03 -0.161423743800802989E-03 -0.161423313366150007E-03 -0.161424179009763005E-03 -0.161424179009759996E-03 -0.161423313366152013E-03 -0.166666666666667011E-01 -0.435069386750616997E-03 -0.435069397494826013E-03 -0.435069386095123000E-03 -0.435069398165091018E-03 -0.138284703375701995E-03 -0.140231491539637002E-03 0.000000000000000000E+00 0.420931147064136021E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435014771024603002E-03 -0.435085089957181989E-03 -0.666666666666666970E-02 -0.138286970731685001E-03 -0.140229224183653997E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.254910621183637998E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423348017174001E-03 -0.161424144358738008E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435079665188463989E-03 -0.435059345445455996E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.249645250281322999E-01 0.000000000000000000E+00 -0.161423121269891999E-03 -0.161424371106020010E-03 -0.666666666666666970E-02 -0.139257838659735000E-03 -0.139258356255603997E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319784847308889966E-01 -0.161423139772803008E-03 -0.161424352603109001E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435059648379540009E-03 -0.435079356600350973E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.323098477771483969E-01 0.000000000000000000E+00 -0.434855411650391993E-03 -0.435251202539120021E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.138421129190984010E-03 -0.140095065724355991E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.286728645834729984E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423315838776007E-03 -0.161424176537136002E-03 -0.161423752187323010E-03 -0.161423740188588999E-03 -0.161423317510291011E-03 -0.161424174865620998E-03 -0.666666666666666970E-02 -0.438771655527875001E-03 -0.431448953116295001E-03 -0.138271241563716002E-03 -0.140244953351622995E-03 -0.435165323893648012E-03 -0.434975574060252989E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.238543465439772992E-01 -0.666666666666666970E-02 -0.166666666666667011E-01 0.419833961506188985E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423304924141007E-03 -0.161424187451771002E-03 -0.666666666666666970E-02 -0.435069598057073975E-03 -0.435069190609702994E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.256582546102008015E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.434978063949837974E-03 -0.435123367209186982E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138286924064156001E-03 -0.140229270851182996E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.256881329485436002E-01 0.000000000000000000E+00 -0.434979402202776018E-03 -0.435121969745410990E-03 -0.666666666666666970E-02 -0.140226692592859009E-03 -0.138289502322480991E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.257149793095595991E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.434964716442319023E-03 -0.435137270613733019E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.138403971130206005E-03 -0.140112223785132992E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320995089735353969E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423309436031000E-03 -0.161424182939881009E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435071687413104014E-03 -0.435067147303640016E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.257775991854455010E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.434123548609677019E-03 -0.436014071331951010E-03 0.000000000000000000E+00 -0.138328115136844999E-03 -0.140188079778493999E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320970398516952013E-01 -0.161423235617312989E-03 -0.161424256758598993E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435167788232856023E-03 -0.434973170725627982E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.255649837703663986E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423599069647001E-03 -0.161423893306266011E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.435941020818056012E-03 -0.434216891005145009E-03 0.321778215370647006E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.133203828204913997E-03 -0.554477594184077968E-03 -0.310288607973751023E-03 -0.560774180157768039E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.258307931039124007E-01 0.000000000000000000E+00 -0.200815667582686987E-03 -0.626132462157779008E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 -0.446235732743376025E-03 -0.410993106848831988E-03 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.421918986255057984E-01 0.000000000000000000E+00 -0.435044026349501998E-03 -0.435054581088656020E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.138420116213701009E-03 -0.140096078701637989E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.251983478038565016E-01 0.000000000000000000E+00 -0.874760871910627032E-05 -0.678933813669884995E-03 -0.666666666666666970E-02 -0.139011566068860000E-03 -0.139504628846480000E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319960854906845005E-01 0.000000000000000000E+00 -0.161423534312251998E-03 -0.161423958063660011E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.422822744160627974E-03 -0.447571236367950997E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 0.000000000000000000E+00 -0.166666666666667011E-01 -0.166666666666667011E-01 0.251635090375166015E-01 -0.161499426349241996E-03 -0.161348066026670989E-03 -0.666666666666666970E-02 -0.431483288490702005E-03 -0.438741647350535027E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.312158150513081011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161424178244698994E-03 -0.161423314131212988E-03 -0.161423314059391994E-03 -0.161424178316519988E-03 -0.161423748120379993E-03 -0.161423744255532992E-03 -0.166666666666667011E-01 -0.435052937198406996E-03 -0.435086217674513994E-03 -0.138285143315771992E-03 -0.140231051599568009E-03 -0.435084983208553009E-03 -0.435054143960659026E-03 0.000000000000000000E+00 0.420929687916430026E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.435023352498181996E-03 -0.435076141242082002E-03 -0.666666666666666970E-02 -0.138286660717270009E-03 -0.140229534198068988E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.256871016294627011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435049188710877018E-03 -0.435049198898220012E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.139258077330179004E-03 -0.139258117585160996E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.252598187185143000E-01 0.000000000000000000E+00 -0.161423313380516987E-03 -0.161424178995394995E-03 -0.666666666666666970E-02 -0.435070189961645989E-03 -0.435068611747460024E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.320929801235530984E-01 -0.435014945837523977E-03 -0.435084907681736004E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.138286694923904008E-03 -0.140229499991435992E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.320996579074415977E-01 0.000000000000000000E+00 -0.161423313471188002E-03 -0.161424178904724007E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069377867833978E-03 -0.435069406577771021E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.295485029270328992E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435016111300222006E-03 -0.435083692344642020E-03 -0.435055716669636018E-03 -0.435042938584860996E-03 -0.435014924590734005E-03 -0.435084929837698995E-03 -0.666666666666666970E-02 -0.138321048357603003E-03 -0.140195146557735994E-03 -0.138900264538850991E-03 -0.139615930376488006E-03 -0.138286695606074989E-03 -0.140229499309265011E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.238543465439772992E-01 -0.666666666666666970E-02 -0.166666666666667011E-01 0.420929804437093005E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.435014924430680990E-03 -0.435084930004601023E-03 -0.666666666666666970E-02 -0.138286695607513997E-03 -0.140229499307826003E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.257458287840863984E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435049188263473999E-03 -0.435049199364782998E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.139258097267473007E-03 -0.139258097647866993E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.255775463585166986E-01 0.000000000000000000E+00 -0.161423313378626003E-03 -0.161424178997286006E-03 -0.666666666666666970E-02 -0.435069399615192016E-03 -0.435069384676977993E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.257404832051559007E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435042938584863977E-03 -0.435055716669632006E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.138910376055990990E-03 -0.139605818859349010E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320997361635547013E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423315571449996E-03 -0.161424176804462013E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069384753956998E-03 -0.435069399536463976E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.256633780155203009E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423313378038989E-03 -0.161424178997872993E-03 0.000000000000000000E+00 -0.435069385238039011E-03 -0.435069399041488984E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.322089283932210022E-01 -0.435014924466399001E-03 -0.435084929967365997E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.138286695450416002E-03 -0.140229499464922995E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.256614590292296996E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435016112049732974E-03 -0.435083691563056016E-03 -0.166666666666667011E-01 -0.138320998722292005E-03 -0.140195196193046992E-03 0.000000000000000000E+00 0.319837098823798002E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.161423313471231993E-03 -0.161424178904679989E-03 -0.435069384798286989E-03 -0.435069399491148988E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.256582249674734003E-01 0.000000000000000000E+00 -0.434980660273001976E-03 -0.435120659876175994E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 -0.138286858201095011E-03 -0.140229336714244013E-03 0.000000000000000000E+00 0.333333333333332982E-01 0.000000000000000000E+00 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.419834073410084996E-01 0.000000000000000000E+00 -0.161423305232396002E-03 -0.161424187143516007E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435069384645917010E-03 -0.435069399647007983E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.253691078100609993E-01 0.000000000000000000E+00 -0.434980641243667987E-03 -0.435120678167986010E-03 -0.666666666666666970E-02 -0.140229336714366013E-03 -0.138286858200973987E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319837098823803970E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423313471231993E-03 -0.161424178904679989E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069384793870004E-03 -0.435069399495665991E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 0.000000000000000000E+00 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.253136149492908004E-01 -0.435016112049908018E-03 -0.435083691562873978E-03 -0.666666666666666970E-02 -0.138320998722292005E-03 -0.140195196193046992E-03 -0.166666666666667011E-01 0.312157364805190005E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423748574844990E-03 -0.161423743801066992E-03 -0.161423313378673003E-03 -0.161424178997239006E-03 -0.161424178997232013E-03 -0.161423313378679996E-03 -0.166666666666667011E-01 -0.435069383970378013E-03 -0.435069400337715023E-03 -0.435069376957132018E-03 -0.435069407508996008E-03 -0.138284694213328010E-03 -0.140231500702010987E-03 0.000000000000000000E+00 0.420929739826170005E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.435014932601145024E-03 -0.435084921485382001E-03 -0.666666666666666970E-02 -0.138286682366655988E-03 -0.140229512548683009E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.254915570191668016E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423360046659997E-03 -0.161424132329252012E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435068904052976024E-03 -0.435069891069177000E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.249645635933331000E-01 -0.161423119060364001E-03 -0.161424373315548008E-03 -0.666666666666666970E-02 -0.139258110049798010E-03 -0.139258084865540987E-03 -0.166666666666667011E-01 0.319795589129939009E-01 -0.161423181597269993E-03 -0.161424310778641989E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435063340741964014E-03 -0.435075580375908022E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.322057823008502012E-01 0.000000000000000000E+00 -0.435021995940365993E-03 -0.435077556208205004E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.138280141008673011E-03 -0.140236053906665986E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.286987467722891014E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423485461700003E-03 -0.161424006914212006E-03 -0.161423743701962999E-03 -0.161423748673950013E-03 -0.161423485478030988E-03 -0.161424006897880994E-03 -0.666666666666666970E-02 -0.434818199915888975E-03 -0.435326186273355992E-03 -0.138107939683270998E-03 -0.140408255232067999E-03 -0.434750487938305018E-03 -0.435395405799094998E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.238543465439772992E-01 -0.666666666666666970E-02 -0.166666666666667011E-01 0.419834078568204969E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423305246576987E-03 -0.161424187129334995E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435069368742756982E-03 -0.435069415908642004E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.256580957796514984E-01 0.000000000000000000E+00 -0.434980863637928004E-03 -0.435120447850464987E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138286593339167997E-03 -0.140229601576171000E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.256868273049145994E-01 0.000000000000000000E+00 -0.434980612246944019E-03 -0.435120708410805001E-03 -0.666666666666666970E-02 -0.140229353764139986E-03 -0.138286841151199011E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.256611955309135990E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435106719724830994E-03 -0.434994029732972990E-03 0.000000000000000000E+00 -0.138320118014007004E-03 -0.140196076901331993E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320999233193186018E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423320520808004E-03 -0.161424171855104005E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435059778147594005E-03 -0.435079222540635984E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.257716863610135995E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435022363973044002E-03 -0.435077172538276982E-03 0.000000000000000000E+00 -0.138312498857191992E-03 -0.140203696058148009E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320996765524508010E-01 0.000000000000000000E+00 -0.161423313971483987E-03 -0.161424178404427995E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435068773858323006E-03 -0.435070024197505999E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.255013626644550011E-01 -0.666666666666666970E-02 -0.161778513994174000E-03 -0.161068978381738009E-03 -0.166666666666667011E-01 -0.430721544928321987E-03 -0.439524018875910982E-03 0.319967451293014030E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.161423540387061002E-03 -0.161423951988851007E-03 -0.436779134539303025E-03 -0.433396918749490016E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.257463974004814994E-01 0.000000000000000000E+00 -0.357123790381600977E-03 -0.513334443915617971E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 -0.138849213053182001E-03 -0.139666981862157999E-03 0.000000000000000000E+00 0.333333333333332982E-01 0.000000000000000000E+00 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.421600707339402012E-01 0.000000000000000000E+00 -0.438162222449141976E-03 -0.432071491857211011E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.110310967390135005E-03 -0.562620286998803958E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.250247683829411989E-01 0.000000000000000000E+00 0.312489112005970006E-04 -0.718930333589588031E-03 -0.666666666666666970E-02 -0.143948366628396992E-03 -0.134567828286942005E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.321784029807069966E-01 -0.131393333165138998E-03 -0.556288089223852967E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.344952960577046006E-03 -0.526365968885401047E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 0.000000000000000000E+00 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.252170877020880015E-01 -0.161423598610962993E-03 -0.161423893764948989E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435940683620558982E-03 -0.434217221098728986E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.312157364068764015E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423748574870008E-03 -0.161423743801043004E-03 -0.161423313378020991E-03 -0.161424178997891994E-03 -0.161424178997885001E-03 -0.161423313378028011E-03 -0.166666666666667011E-01 -0.435069384799240003E-03 -0.435069399490174995E-03 -0.435069377535609984E-03 -0.435069406917481974E-03 -0.138284694694889009E-03 -0.140231500220449988E-03 0.000000000000000000E+00 0.000000000000000000E+00 0.420929804454727996E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.435014924420943987E-03 -0.435084930014753980E-03 -0.666666666666666970E-02 -0.138286695611132006E-03 -0.140229499304206991E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.256871018618805014E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435049188236810973E-03 -0.435049199392586983E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.139258097270825008E-03 -0.139258097644513989E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.252598186487732017E-01 0.000000000000000000E+00 -0.161423313378620013E-03 -0.161424178997291996E-03 -0.666666666666666970E-02 -0.435069399482861021E-03 -0.435069384806393027E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.320929801562968992E-01 -0.435014924399239995E-03 -0.435084930037426003E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.138286695018654008E-03 -0.140229499896684989E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.320996579148839015E-01 0.000000000000000000E+00 -0.161423313471389989E-03 -0.161424178904521993E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069384785220998E-03 -0.435069399504509015E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.295485029252238011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435016114058515979E-03 -0.435083689468307975E-03 -0.435055716894270023E-03 -0.435042938369449973E-03 -0.435014924446868020E-03 -0.435084929987721025E-03 -0.666666666666666970E-02 -0.138321048328908996E-03 -0.140195146586430001E-03 -0.138900264540856006E-03 -0.139615930374483994E-03 -0.138286695607003012E-03 -0.140229499308336012E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.238543465439772992E-01 -0.666666666666666970E-02 -0.166666666666667011E-01 0.420929804454603998E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.435014924420999986E-03 -0.435084930014695975E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.138286695611106988E-03 -0.140229499304232009E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.257458287849481986E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435049188236881013E-03 -0.435049199392514017E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.139258097270799990E-03 -0.139258097644539007E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.255775463582476013E-01 0.000000000000000000E+00 -0.161423313378621992E-03 -0.161424178997289990E-03 -0.666666666666666970E-02 -0.435069399483869980E-03 -0.435069384805406023E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.257404831771072991E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435042938369448998E-03 -0.435055716894270999E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.138910376031616011E-03 -0.139605818883723013E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320997361628699990E-01 0.000000000000000000E+00 -0.161423315571431998E-03 -0.161424176804481013E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069384829067001E-03 -0.435069399459660992E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.256633780151233996E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423313378029990E-03 -0.161424178997881992E-03 0.000000000000000000E+00 -0.435069376411787974E-03 -0.435069408066628019E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.322089281582298992E-01 -0.435014924431676993E-03 -0.435084930003603991E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.138286694967978995E-03 -0.140229499947360002E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.256614590292298002E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435016112049635016E-03 -0.435083691563157009E-03 -0.166666666666667011E-01 -0.138320998722293008E-03 -0.140195196193045990E-03 0.000000000000000000E+00 0.319837098823795019E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.161423313471231993E-03 -0.161424178904679989E-03 -0.435069384800495997E-03 -0.435069399488889998E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.256582249675054996E-01 0.000000000000000000E+00 -0.434980660275541991E-03 -0.435120659873527993E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 -0.138286858201179010E-03 -0.140229336714159988E-03 0.000000000000000000E+00 0.333333333333332982E-01 0.000000000000000000E+00 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.419834073409666025E-01 0.000000000000000000E+00 -0.161423305232394999E-03 -0.161424187143517010E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435069384656581982E-03 -0.435069399636101993E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.253691078101523984E-01 0.000000000000000000E+00 -0.434980641228473002E-03 -0.435120678183829998E-03 -0.666666666666666970E-02 -0.140229336714159988E-03 -0.138286858201179010E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319837098823795019E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423313471231993E-03 -0.161424178904679989E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069384800471982E-03 -0.435069399488914989E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 0.000000000000000000E+00 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.253136149492911994E-01 -0.435016112049635992E-03 -0.435083691563157009E-03 -0.666666666666666970E-02 -0.138320998722293008E-03 -0.140195196193045990E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.320885629537309008E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435084884244977990E-03 -0.435014968312467975E-03 -0.435014924420903005E-03 -0.435084930014797023E-03 -0.435055716900959984E-03 -0.435042938363035996E-03 -0.166666666666667011E-01 0.000000000000000000E+00 -0.138286695608023002E-03 -0.140229499307315995E-03 -0.138900264540568990E-03 -0.139615930374770007E-03 -0.138287928689845009E-03 -0.140228266225493988E-03 0.000000000000000000E+00 0.419834073409644029E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423305232394999E-03 -0.161424187143517010E-03 -0.666666666666666970E-02 -0.435069384693222974E-03 -0.435069399598635002E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.255994980449185013E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.434980660275051986E-03 -0.435120659874038977E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138286858201180012E-03 -0.140229336714159012E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.253691078105353005E-01 0.000000000000000000E+00 -0.434980641228200975E-03 -0.435120678184114005E-03 -0.666666666666666970E-02 -0.140229336713374998E-03 -0.138286858201963999E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319837881362097023E-01 -0.161423315571430996E-03 -0.161424176804481013E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069384829208001E-03 -0.435069399459517986E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.320996579140455027E-01 0.000000000000000000E+00 -0.161423313471367004E-03 -0.161424178904545006E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069384800495021E-03 -0.435069399488890974E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.295485029266231991E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435016113771493024E-03 -0.435083689767614020E-03 -0.435055716901091010E-03 -0.435042938362910012E-03 -0.435014924420824020E-03 -0.435084930014878989E-03 -0.666666666666666970E-02 -0.138321048331202002E-03 -0.140195146584136995E-03 -0.138900264540522993E-03 -0.139615930374817007E-03 -0.138286695607886989E-03 -0.140229499307452008E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.238543465439772992E-01 -0.666666666666666970E-02 -0.166666666666667011E-01 0.420929804454732021E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.435014924420943012E-03 -0.435084930014755010E-03 -0.666666666666666970E-02 -0.138286695611133009E-03 -0.140229499304205988E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.257458287849504017E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435049188236650999E-03 -0.435049199392754005E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.139258097270826011E-03 -0.139258097644513013E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.255775463581538985E-01 0.000000000000000000E+00 -0.161423313378620013E-03 -0.161424178997291996E-03 -0.666666666666666970E-02 -0.435069399482816027E-03 -0.435069384806436991E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.257404831864265007E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435042938362910012E-03 -0.435055716901091010E-03 0.000000000000000000E+00 -0.138910376039622004E-03 -0.139605818875716993E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320997361628520966E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423315571430996E-03 -0.161424176804481013E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069384829304983E-03 -0.435069399459418022E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.256633780150350016E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423313378028011E-03 -0.161424178997883998E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.435069377535611014E-03 -0.435069406917480998E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.322089281809833025E-01 -0.435014924399087014E-03 -0.435084930037585977E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.138286695014638991E-03 -0.140229499900700006E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.255642320220383010E-01 -0.666666666666666970E-02 -0.161423743718748996E-03 -0.161423748657163013E-03 -0.166666666666667011E-01 -0.435032852654396000E-03 -0.435106754681328976E-03 0.319837065485489985E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.161423313381479000E-03 -0.161424178994433009E-03 -0.435069384799287003E-03 -0.435069399490127019E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.256582249674894985E-01 0.000000000000000000E+00 -0.434980660275194016E-03 -0.435120659873890984E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 -0.138286858201148001E-03 -0.140229336714191999E-03 0.000000000000000000E+00 0.333333333333332982E-01 0.000000000000000000E+00 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.419834073409640976E-01 0.000000000000000000E+00 -0.161423305232394999E-03 -0.161424187143517010E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435069384687570974E-03 -0.435069399604414992E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.253691078101366992E-01 0.000000000000000000E+00 -0.434980641228032002E-03 -0.435120678184291001E-03 -0.666666666666666970E-02 -0.140229336714191999E-03 -0.138286858201148001E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319837065485489985E-01 0.000000000000000000E+00 -0.161423313381479000E-03 -0.161424178994433009E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069384799287003E-03 -0.435069399490127019E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 0.000000000000000000E+00 -0.166666666666667011E-01 -0.166666666666667011E-01 0.252163879420997002E-01 -0.161423743718748996E-03 -0.161423748657163013E-03 -0.666666666666666970E-02 -0.435032852654396000E-03 -0.435106754681328976E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320887366348247965E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435111019942127013E-03 -0.434989904848891011E-03 -0.435014811994715992E-03 -0.435085047244578027E-03 -0.435084986246467005E-03 -0.435014870489667008E-03 -0.166666666666667011E-01 0.000000000000000000E+00 -0.138286808290661993E-03 -0.140229386624677005E-03 -0.138288040456547998E-03 -0.140228154458792002E-03 -0.138900328862363002E-03 -0.139615866052975995E-03 0.000000000000000000E+00 0.419834068919483008E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423305219605994E-03 -0.161424187156306991E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435073590544243023E-03 -0.435065286134219016E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.255994981604824000E-01 0.000000000000000000E+00 -0.434980605991956010E-03 -0.435120716478292976E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138286858621097992E-03 -0.140229336294241005E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.253691080400011010E-01 0.000000000000000000E+00 -0.434980551188209996E-03 -0.435120772070955015E-03 -0.666666666666666970E-02 -0.140229336059168004E-03 -0.138286858856170994E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319837098814821016E-01 -0.161423313471205999E-03 -0.161424178904706010E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069401084582977E-03 -0.435069383239967001E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320996545746903006E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423313381464986E-03 -0.161424178994447998E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069392005489001E-03 -0.435069392121542001E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.292425277050150992E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423743718737991E-03 -0.161423748657174993E-03 -0.161424178997885001E-03 -0.161423313378027008E-03 -0.161423313378019988E-03 -0.161424178997891994E-03 -0.666666666666666970E-02 -0.435032852646152974E-03 -0.435106754689757998E-03 -0.435069377535869000E-03 -0.435069406917216995E-03 0.000000000000000000E+00 -0.435069384798973994E-03 -0.435069399490447021E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.238543465439772992E-01 -0.666666666666666970E-02 -0.166666666666667011E-01 0.420929804212083966E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.435014924419121986E-03 -0.435084930016657026E-03 -0.666666666666666970E-02 -0.138286695561419006E-03 -0.140229499353920994E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.255502626954251001E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423359539924989E-03 -0.161424132835986993E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435069382989675027E-03 -0.435069401340516977E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.252823579642372995E-01 0.000000000000000000E+00 -0.161423118848985994E-03 -0.161424373526926991E-03 -0.666666666666666970E-02 -0.139258097691293003E-03 -0.139258097224046997E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.255489358069172015E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423313378028011E-03 -0.161424178997883998E-03 0.000000000000000000E+00 -0.435069377535374983E-03 -0.435069406917722016E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.322089281809831013E-01 0.000000000000000000E+00 -0.435014924399098994E-03 -0.435084930037573996E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.138286695014637988E-03 -0.140229499900701009E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.258549253945415010E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435042938363035996E-03 -0.435055716900959008E-03 0.000000000000000000E+00 -0.138910376039619998E-03 -0.139605818875718999E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320997361628526032E-01 -0.161423315571430996E-03 -0.161424176804481013E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069384829208001E-03 -0.435069399459517010E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.255489358069375012E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423313378028011E-03 -0.161424178997883998E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.435069377571737986E-03 -0.435069406880540007E-03 0.000000000000000000E+00 0.320929801553170024E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435014924399170009E-03 -0.435084930037499024E-03 -0.138286695016646012E-03 -0.140229499898693013E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.255502626954253013E-01 0.000000000000000000E+00 -0.161423359539924989E-03 -0.161424132835986993E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 -0.435069382989559993E-03 -0.435069401340634017E-03 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.420929804212068007E-01 0.000000000000000000E+00 -0.435014924417415994E-03 -0.435084930018437015E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.138286695561417000E-03 -0.140229499353921997E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.249646302548570016E-01 0.000000000000000000E+00 -0.161423118848985994E-03 -0.161424373526926991E-03 -0.666666666666666970E-02 -0.139258097691296012E-03 -0.139258097224043988E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319837881362063023E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423315571430996E-03 -0.161424176804481013E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069384829284004E-03 -0.435069399459439977E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.253926391078276997E-01 -0.435042938363074973E-03 -0.435055716900919001E-03 -0.666666666666666970E-02 -0.138910376047635993E-03 -0.139605818867703004E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.323033163368178014E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435248914087700014E-03 -0.434857618270111026E-03 -0.434883815525587006E-03 -0.435221605060230013E-03 -0.435144012464388997E-03 -0.434958245913406004E-03 -0.166666666666667011E-01 0.000000000000000000E+00 -0.138392691801862987E-03 -0.140123503113476010E-03 -0.138936896109538013E-03 -0.139579298805801011E-03 -0.138396653972959005E-03 -0.140119540942379992E-03 0.000000000000000000E+00 0.419134531554943013E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.160003730031608006E-03 -0.162843762344304003E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.439266035089253990E-03 -0.430980363868474004E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.256392674343047001E-01 0.000000000000000000E+00 -0.434777562089547990E-03 -0.435332391680561979E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138409864981691995E-03 -0.140106329933648005E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.254101279172756003E-01 0.000000000000000000E+00 -0.373644623812598002E-03 -0.492026351508191052E-03 -0.666666666666666970E-02 -0.908412435916435045E-03 0.235481181527497004E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.321268372004408984E-01 -0.200001267691483995E-03 -0.627372315610022989E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.127171097760206999E-03 -0.545760156628731963E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320995602249590020E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423310638646997E-03 -0.161424181737265012E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.437288210106853009E-03 -0.432899574546833006E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.296377168966710999E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.434989904918347983E-03 -0.435111019849799988E-03 -0.435084787576991007E-03 -0.435015060995395985E-03 -0.435015087313978986E-03 -0.435084760132168008E-03 -0.666666666666666970E-02 -0.138910479330903999E-03 -0.139605715584434998E-03 -0.138288220902347007E-03 -0.140227974012992993E-03 0.000000000000000000E+00 -0.138286985894706013E-03 -0.140229209020633011E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.238543465439772992E-01 -0.666666666666666970E-02 -0.166666666666667011E-01 0.419834073423189028E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423305232431997E-03 -0.161424187143480012E-03 -0.666666666666666970E-02 -0.435069384782949973E-03 -0.435069399506886020E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.256582245002681016E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.434980660161395994E-03 -0.435120659992680999E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138286857243334990E-03 -0.140229337672004008E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.256868356243260994E-01 0.000000000000000000E+00 -0.434980641780946975E-03 -0.435120677607716026E-03 -0.666666666666666970E-02 -0.140229336498973991E-03 -0.138286858416365006E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.255642316967265998E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423743725422992E-03 -0.161423748650489993E-03 0.000000000000000000E+00 -0.435032788457063980E-03 -0.435106820324987996E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320996548633918979E-01 0.000000000000000000E+00 -0.161423313389182012E-03 -0.161424178986729997E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069349793661977E-03 -0.435069435284559975E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.256786747351973009E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423743711796008E-03 -0.161423748664116001E-03 0.000000000000000000E+00 -0.435032875429173019E-03 -0.435106731393101022E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320996542943430968E-01 -0.161423313373898990E-03 -0.161424179002013995E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069393047989017E-03 -0.435069391099458976E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.256124534411805993E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.422563580131968979E-03 -0.447998624216189984E-03 -0.166666666666667011E-01 -0.138286150728846990E-03 -0.140230044186493010E-03 0.000000000000000000E+00 0.319552376669784977E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.161419817025352993E-03 -0.161427675350558989E-03 -0.434132277684113981E-03 -0.436028351477167012E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.256464487089681989E-01 0.000000000000000000E+00 -0.434899896387440008E-03 -0.435204881412590012E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 -0.138261308742774005E-03 -0.140254886172565995E-03 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 0.000000000000000000E+00 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.419830852156009016E-01 0.000000000000000000E+00 -0.161423296248432011E-03 -0.161424196127479998E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435080309509953001E-03 -0.435058715368486990E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.253594622357068006E-01 0.000000000000000000E+00 -0.434950331803641015E-03 -0.435152283781920987E-03 -0.666666666666666970E-02 -0.140249996876020999E-03 -0.138266198039317998E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319571786575686967E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161420475113085006E-03 -0.161427017262827003E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.434308830163140978E-03 -0.435847642627907976E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.252660555352814988E-01 -0.423620518083848974E-03 -0.446908164705409994E-03 -0.666666666666666970E-02 -0.138290733115793013E-03 -0.140225461799546011E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.320833316936339971E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.434976564103407978E-03 -0.435124935063587002E-03 -0.435182102606195996E-03 -0.434921744344358996E-03 -0.434970473518194996E-03 -0.435131286534540024E-03 -0.166666666666667011E-01 0.000000000000000000E+00 -0.138283560982178005E-03 -0.140232633933160992E-03 -0.138290646200932993E-03 -0.140225548714406005E-03 -0.138898978959791009E-03 -0.139617215955547988E-03 0.000000000000000000E+00 0.421628681367203009E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.397192681545031025E-04 -0.727400690543495007E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435235074593796000E-03 -0.434912988301708998E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.256973882949261996E-01 0.000000000000000000E+00 -0.435443110746612982E-03 -0.434671360314161997E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138672805730211991E-03 -0.139843389185127006E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.253811408048402014E-01 0.000000000000000000E+00 -0.111660192932873994E-03 -0.576021229456117972E-03 -0.666666666666666970E-02 -0.807402803567670987E-03 0.113198743547475004E-04 -0.166666666666667011E-01 0.000000000000000000E+00 0.318951479546440986E-01 -0.334801614937765011E-03 -0.534674356904564995E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.135236534786964987E-03 -0.143279660128374010E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320991543630683995E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423299320898012E-03 -0.161424193055014999E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.439089518603328992E-03 -0.431138195492317014E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.292550790147875009E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423735549572994E-03 -0.161423756826338988E-03 -0.161424083758227992E-03 -0.161423408617684993E-03 -0.161423408591030993E-03 -0.161424083784880988E-03 -0.666666666666666970E-02 -0.434939106056469018E-03 -0.435202598866759986E-03 -0.434989974486759020E-03 -0.435150588304750977E-03 0.000000000000000000E+00 -0.434898044279196977E-03 -0.435244580341264027E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.238543465439772992E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 0.000000000000000000E+00 0.420929736779582994E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435014942481456979E-03 -0.435084911182310997E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.138286681738492993E-03 -0.140229513176846004E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.255502900122648001E-01 0.000000000000000000E+00 -0.161423360190818011E-03 -0.161424132185093998E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435068884552595005E-03 -0.435069911008886009E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.252824734873801985E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423118220565989E-03 -0.161424374155345993E-03 -0.666666666666666970E-02 -0.139258110569624000E-03 -0.139258084345714997E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.255485447377057015E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423302756024003E-03 -0.161424189619888006E-03 0.000000000000000000E+00 -0.434872688531902005E-03 -0.435270525342468988E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.322052497845985025E-01 0.000000000000000000E+00 -0.435023979094175024E-03 -0.435075488205578982E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.138279010834745992E-03 -0.140237184080593006E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.258530259445350005E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.434958189050955980E-03 -0.435144092591555997E-03 0.000000000000000000E+00 -0.138909064929744003E-03 -0.139607129985594994E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320994819723071992E-01 -0.161423308725767013E-03 -0.161424183650144996E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435080068461255018E-03 -0.435058950988906974E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.256614731138163008E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.429527300766952978E-03 -0.440793399874087001E-03 -0.166666666666667011E-01 -0.138412525549365991E-03 -0.140103669365973006E-03 0.000000000000000000E+00 0.319693244854589026E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.161422576318977012E-03 -0.161424916056934997E-03 -0.437741651795140981E-03 -0.432457178334903997E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.257682962742953013E-01 0.000000000000000000E+00 -0.372135275695885989E-03 -0.498733752802193965E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 -0.138908713201794991E-03 -0.139607481713544007E-03 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 0.000000000000000000E+00 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.418896828694114015E-01 0.000000000000000000E+00 -0.436533664255522999E-03 -0.433624016339187999E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.134626819469867013E-03 -0.143889375445472011E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.252833740154079986E-01 0.000000000000000000E+00 0.378159457419296988E-04 -0.725497368130921007E-03 -0.666666666666666970E-02 -0.597713125202046038E-03 -0.752181291868920024E-04 -0.166666666666667011E-01 0.000000000000000000E+00 0.321707087590768978E-01 0.000000000000000000E+00 -0.981956570690172963E-04 -0.589485765319974005E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.242422521139259002E-03 -0.626419769058458972E-03 -0.166666666666667011E-01 0.333333333333332982E-01 0.000000000000000000E+00 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.252392538739079006E-01 -0.333702074783927001E-03 -0.536013663819403965E-03 -0.666666666666666970E-02 0.000000000000000000E+00 0.000000000000000000E+00 -0.138219319560300990E-03 -0.140296875355038007E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.323033117395759989E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435248913863458978E-03 -0.434857618485890026E-03 -0.434883818075460025E-03 -0.435221602402711982E-03 -0.435144010721807983E-03 -0.434958247585231009E-03 -0.166666666666667011E-01 0.000000000000000000E+00 -0.138392690175634999E-03 -0.140123504739703998E-03 -0.138936895546960007E-03 -0.139579299368378990E-03 -0.138396652402613996E-03 -0.140119542512725002E-03 0.000000000000000000E+00 0.419134536981460026E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.160003888239780996E-03 -0.162843604136131013E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.439265966752186974E-03 -0.430980430216653992E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.000000000000000000E+00 0.256385430489332010E-01 0.000000000000000000E+00 -0.434778992070864020E-03 -0.435330901615854019E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138408856053690005E-03 -0.140107338861648992E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.254101277388419991E-01 0.000000000000000000E+00 -0.373643453136775986E-03 -0.492027450061672958E-03 -0.666666666666666970E-02 -0.908412791146940971E-03 0.235481536758003012E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.321268373345425032E-01 -0.200001595110305000E-03 -0.627372161158813030E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.127171079349847012E-03 -0.545760175039092005E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.321000029443719026E-01 0.000000000000000000E+00 -0.161423322269462990E-03 -0.161424170106448992E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.437246082492378997E-03 -0.432940746439987984E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.296367144183955003E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.435124963867026995E-03 -0.434976534547966984E-03 -0.435077880745983003E-03 -0.435021684411966020E-03 -0.435028125198068990E-03 -0.435071164272797983E-03 -0.666666666666666970E-02 -0.138910250043690000E-03 -0.139605944871648997E-03 -0.138289604026781002E-03 -0.140226590888557995E-03 -0.138286261066744988E-03 -0.140229933848595012E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.238543465439772992E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 0.000000000000000000E+00 0.419834145363481015E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423305428732005E-03 -0.161424186947180004E-03 -0.666666666666666970E-02 -0.435068797988044014E-03 -0.435069999528659985E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.256581969780162004E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.434982641293220015E-03 -0.435118594151826009E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138286797863401998E-03 -0.140229397051936999E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.256861833871666993E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.434980942275246015E-03 -0.435120364500525027E-03 -0.666666666666666970E-02 -0.140230675945465999E-03 -0.138285518969872998E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.255648152871906995E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423744276026012E-03 -0.161423748099886999E-03 0.000000000000000000E+00 -0.434971116348022017E-03 -0.435169879709118980E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320996762669861976E-01 0.000000000000000000E+00 -0.161423313958423010E-03 -0.161424178417488999E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435067366048129010E-03 -0.435071463729721000E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.256787289926059988E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423752315998997E-03 -0.161423740059913012E-03 0.000000000000000000E+00 -0.434863867040502017E-03 -0.435279543030525011E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.321000539630773019E-01 -0.161423323759014989E-03 -0.161424168616896993E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435021966445662005E-03 -0.435117885588833984E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.256124534378156000E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.422563583786935012E-03 -0.447998620447711019E-03 -0.166666666666667011E-01 -0.138286150716901006E-03 -0.140230044198437991E-03 0.000000000000000000E+00 0.319552376748891975E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.161419817028325008E-03 -0.161427675347588004E-03 -0.434132178542880997E-03 -0.436028452917342011E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.256464472998883010E-01 0.000000000000000000E+00 -0.434900113529982019E-03 -0.435204654991785977E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 -0.138261303887879009E-03 -0.140254891027459988E-03 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 0.000000000000000000E+00 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.419830890665787021E-01 0.000000000000000000E+00 -0.161423296358576002E-03 -0.161424196017336007E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435055172719591018E-03 -0.435083932013876024E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.253594585320825010E-01 0.000000000000000000E+00 -0.434951272976298023E-03 -0.435151302408671016E-03 -0.666666666666666970E-02 -0.140250006846153006E-03 -0.138266188069185992E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319571786801332997E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161420475119938004E-03 -0.161427017255975008E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.434308550272304017E-03 -0.435847929000599975E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.252660555256693994E-01 -0.423620528167419021E-03 -0.446908154298355977E-03 -0.666666666666666970E-02 -0.138290733082334995E-03 -0.140225461833005005E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.320861713072602972E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435354479890220003E-03 -0.434756412513020009E-03 -0.435057879269062974E-03 -0.435040865004169999E-03 -0.434966118570618990E-03 -0.435135827690980980E-03 -0.166666666666667011E-01 0.000000000000000000E+00 -0.138285724464454012E-03 -0.140230470450885988E-03 -0.138897585402013996E-03 -0.139618609513325002E-03 -0.138201697331153989E-03 -0.140314497584185008E-03 0.000000000000000000E+00 0.421628678547669028E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.397284147640295009E-04 -0.727409837153020992E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435314260356761012E-03 -0.434838234146065004E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.256973904451089016E-01 0.000000000000000000E+00 -0.435443064032224982E-03 -0.434671405121811992E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138672811291356012E-03 -0.139843383623983013E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.253811442749607014E-01 0.000000000000000000E+00 -0.111659649866176005E-03 -0.576021772522815052E-03 -0.666666666666666970E-02 -0.807399256528189952E-03 0.113123198788523993E-04 -0.166666666666667011E-01 0.000000000000000000E+00 0.318951480124047976E-01 -0.334801256611660003E-03 -0.534674694551013989E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.135236541601312007E-03 -0.143279653314027993E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320991543634817009E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423299320899991E-03 -0.161424193055011991E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.439089603173896989E-03 -0.431138112796988022E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.292550790154386987E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423735549587007E-03 -0.161423756826326004E-03 -0.161424083758224008E-03 -0.161423408617689004E-03 -0.161423408591035005E-03 -0.161424083784877004E-03 -0.666666666666666970E-02 -0.434939104555981024E-03 -0.435202600400863017E-03 -0.434989974551208984E-03 -0.435150588238853982E-03 0.000000000000000000E+00 -0.434898044011626994E-03 -0.435244580614824986E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.238543465439772992E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 0.000000000000000000E+00 0.420929736778404007E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.435014942481818994E-03 -0.435084911181932989E-03 -0.666666666666666970E-02 -0.138286681738250999E-03 -0.140229513177087998E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.255502900122961014E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423360190818987E-03 -0.161424132185092995E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435068884543961991E-03 -0.435069911017712987E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.252824734843049986E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423118220580002E-03 -0.161424374155332007E-03 -0.666666666666666970E-02 -0.139258110569847997E-03 -0.139258084345492004E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.255485447455677007E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423302756240003E-03 -0.161424189619672006E-03 0.000000000000000000E+00 -0.434872688199729998E-03 -0.435270525682074014E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.322052497810565996E-01 0.000000000000000000E+00 -0.435023979104199015E-03 -0.435075488195124996E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.138279010827208998E-03 -0.140237184088130000E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.258530259407728988E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.434958190724600005E-03 -0.435144090846366989E-03 0.000000000000000000E+00 -0.138909064921109010E-03 -0.139607129994229987E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320994819774753012E-01 -0.161423308725907011E-03 -0.161424183650004998E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435080068181975977E-03 -0.435058951262028994E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.256614731138162001E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.429527300766965989E-03 -0.440793399874073991E-03 -0.166666666666667011E-01 -0.138412525549365991E-03 -0.140103669365973006E-03 0.000000000000000000E+00 0.319693244854589997E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.161422576318977012E-03 -0.161424916056934997E-03 -0.437741651794766986E-03 -0.432457178335270023E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.257682962742878004E-01 0.000000000000000000E+00 -0.372135275696198998E-03 -0.498733752801886051E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 -0.138908713201775990E-03 -0.139607481713563007E-03 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 0.000000000000000000E+00 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.418896828693844023E-01 0.000000000000000000E+00 -0.436533664324920991E-03 -0.433624016272548000E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.134626819467095006E-03 -0.143889375448244994E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.252833740154073013E-01 0.000000000000000000E+00 0.378159457400615982E-04 -0.725497368129053035E-03 -0.666666666666666970E-02 -0.597713125208466033E-03 -0.752181291804723058E-04 -0.166666666666667011E-01 0.000000000000000000E+00 0.321707087590768007E-01 0.000000000000000000E+00 -0.981956570691642057E-04 -0.589485765319826987E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.242422521137329990E-03 -0.626419769060301032E-03 -0.166666666666667011E-01 0.333333333333332982E-01 0.000000000000000000E+00 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.252392538739078000E-01 -0.333702074783996986E-03 -0.536013663819338046E-03 -0.666666666666666970E-02 0.000000000000000000E+00 0.000000000000000000E+00 -0.138219319560300990E-03 -0.140296875355038007E-03 -0.166666666666667011E-01 0.320887149633356031E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435084983875554023E-03 -0.435014872763852026E-03 -0.435014828662647025E-03 -0.435085029863970982E-03 -0.435055763684865022E-03 -0.435042893498926986E-03 -0.166666666666667011E-01 -0.138286799284913995E-03 -0.140229395630425002E-03 -0.138900300230182012E-03 -0.139615894685157012E-03 -0.138288031619809013E-03 -0.140228163295530011E-03 0.000000000000000000E+00 0.419834394758250989E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423306124254990E-03 -0.161424186251656992E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435073250189719975E-03 -0.435065618982948010E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.000000000000000000E+00 0.255995701724177002E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.434985228537088976E-03 -0.435115896247923015E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138286994987186010E-03 -0.140229199928153990E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.253544285340416009E-01 0.000000000000000000E+00 -0.434987985656363021E-03 -0.435113023678724013E-03 -0.666666666666666970E-02 -0.140261879841764011E-03 -0.138254315073575989E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319435795329029021E-01 -0.161410251962658001E-03 -0.161437240413254008E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.433888695127137008E-03 -0.436278193993204976E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.320989341466072983E-01 0.000000000000000000E+00 -0.161423293334831010E-03 -0.161424199041080999E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435076232841126974E-03 -0.435062702151373017E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.291210504532323985E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423172358814994E-03 -0.161424320017096988E-03 -0.161436491869935994E-03 -0.161411000505975988E-03 -0.161411014301727991E-03 -0.161436478074183991E-03 -0.666666666666666970E-02 -0.437347517893396990E-03 -0.432844697077642020E-03 -0.436241571863408976E-03 -0.433925164846824982E-03 0.000000000000000000E+00 -0.437803693779869989E-03 -0.432399238650052009E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.238543465439772992E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 0.000000000000000000E+00 0.420931105150842977E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.435014654010180979E-03 -0.435085211979509994E-03 -0.666666666666666970E-02 -0.138286962315859997E-03 -0.140229232599479000E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.255498409722413992E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423349311523990E-03 -0.161424143064387991E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435079351261626014E-03 -0.435059652438656008E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.252819204017892991E-01 0.000000000000000000E+00 -0.161423122522292011E-03 -0.161424369853619998E-03 -0.666666666666666970E-02 -0.139257846415800999E-03 -0.139258348499537998E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.255491014693066017E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423317190226995E-03 -0.161424175185684987E-03 0.000000000000000000E+00 -0.438043455875436027E-03 -0.432161023931338002E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.322801706716391024E-01 0.000000000000000000E+00 -0.434874122345711006E-03 -0.435231709745513020E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.138392757283109009E-03 -0.140123437632229988E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.258886981903099014E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435135825668647020E-03 -0.434966107095303013E-03 0.000000000000000000E+00 -0.138935446888224990E-03 -0.139580748027114007E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.321000528704651969E-01 -0.161423323773416988E-03 -0.161424168602494994E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435030730220607980E-03 -0.435108924522503010E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.254844804391417996E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161142124363106994E-03 -0.161705368012804988E-03 -0.166666666666667011E-01 -0.225281625809598993E-03 -0.643927738894806969E-03 0.000000000000000000E+00 0.321599842976618006E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 0.464257778793151032E-04 -0.734107200268307048E-03 -0.388025614613707001E-03 -0.483301313684787997E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.255020432374560985E-01 0.000000000000000000E+00 -0.337126126111301995E-03 -0.532578756276378025E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 -0.137258701832956002E-03 -0.141257493082382995E-03 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 0.000000000000000000E+00 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.421427039886230967E-01 0.000000000000000000E+00 -0.413943151144718978E-03 -0.457074858319932024E-03 -0.666666666666666970E-02 0.000000000000000000E+00 0.211434099434618995E-03 -0.884365353823558038E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.252964489671319986E-01 0.000000000000000000E+00 -0.213567925484438011E-03 -0.614115947829842971E-03 -0.666666666666666970E-02 -0.140017356100088014E-03 -0.138498838815251987E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319739847535212979E-01 0.000000000000000000E+00 -0.161422918075368997E-03 -0.161424574300543012E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.427230082581125004E-03 -0.443079920816016025E-03 -0.166666666666667011E-01 0.333333333333332982E-01 0.000000000000000000E+00 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.253449783929956991E-01 -0.431322912949462978E-03 -0.438929377851577025E-03 -0.666666666666666970E-02 0.000000000000000000E+00 0.000000000000000000E+00 -0.138850109293811998E-03 -0.139666085621527000E-03 -0.166666666666667011E-01 0.240847367787417992E-01 -0.666666666666666970E-02 -0.166666666666667011E-01 0.419834073413790990E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423305232406004E-03 -0.161424187143506005E-03 -0.666666666666666970E-02 -0.435069384667189979E-03 -0.435069399625256014E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.255994979990794985E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.434980660475906998E-03 -0.435120659664607981E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.138286858107371994E-03 -0.140229336807968006E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.253691078096668007E-01 0.000000000000000000E+00 -0.434980641165363003E-03 -0.435120678249643021E-03 -0.666666666666666970E-02 -0.140229336715335995E-03 -0.138286858200003003E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319837098824014010E-01 -0.161423313471232996E-03 -0.161424178904679013E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069384765853027E-03 -0.435069399524313973E-03 -0.166666666666667011E-01 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.320996543007720028E-01 0.000000000000000000E+00 -0.161423313374075010E-03 -0.161424179001836999E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069381467433980E-03 -0.435069402897058982E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.292425276973896017E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423743711891987E-03 -0.161423748664019995E-03 -0.161424178997872993E-03 -0.161423313378038989E-03 -0.161423313378032999E-03 -0.161424178997880013E-03 -0.666666666666666970E-02 -0.435032871054405002E-03 -0.435106735866860005E-03 -0.435069379977790014E-03 -0.435069404420270985E-03 -0.435069383385271993E-03 -0.435069400936004996E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.238543465439772992E-01 -0.666666666666666970E-02 -0.166666666666667011E-01 0.420929804189209972E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.435014924423208018E-03 -0.435084930012396979E-03 -0.666666666666666970E-02 -0.138286695556728991E-03 -0.140229499358610006E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.255502626963007989E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423359539945995E-03 -0.161424132835965987E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435069382819562025E-03 -0.435069401514463013E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.252823579206017009E-01 0.000000000000000000E+00 -0.161423118849177003E-03 -0.161424373526735006E-03 -0.666666666666666970E-02 -0.139258097695681013E-03 -0.139258097219658011E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.255489355337556005E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423313370652997E-03 -0.161424179005259012E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.435069374869375021E-03 -0.435069409643798000E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.322089281766608018E-01 -0.435014924689618014E-03 -0.435084929734623027E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.138286695005679009E-03 -0.140229499909660992E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.258549254171622014E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435042893494948018E-03 -0.435055763690273998E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.138910376350831992E-03 -0.139605818564507005E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 0.320997360358935974E-01 -0.161423315568021993E-03 -0.161424176807890992E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069380937729011E-03 -0.435069403438677978E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.255489358069375012E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.161423313378028011E-03 -0.161424178997883998E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.435069377571565001E-03 -0.435069406880717003E-03 0.000000000000000000E+00 0.320929801553103966E-01 0.000000000000000000E+00 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435014924404768016E-03 -0.435084930031662005E-03 -0.138286695016626008E-03 -0.140229499898713992E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.255502626954418992E-01 0.000000000000000000E+00 -0.161423359539924989E-03 -0.161424132835986993E-03 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166666666666667011E-01 -0.435069382752795012E-03 -0.435069401582733977E-03 0.000000000000000000E+00 0.333333333333332982E-01 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.420929804179970002E-01 0.000000000000000000E+00 -0.435014924505136027E-03 -0.435084929926964016E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.138286695554652988E-03 -0.140229499360686009E-03 -0.166666666666667011E-01 -0.166666666666667011E-01 0.133333333333332995E-01 -0.666666666666666970E-02 -0.666666666666666970E-02 0.000000000000000000E+00 0.249646302547502988E-01 0.000000000000000000E+00 -0.161423118848985994E-03 -0.161424373526925988E-03 -0.666666666666666970E-02 -0.139258097697407009E-03 -0.139258097217931988E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.319837881362066007E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.161423315571430996E-03 -0.161424176804481013E-03 -0.666666666666666970E-02 -0.666666666666666970E-02 -0.435069384823893025E-03 -0.435069399464952007E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.333333333333332982E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 0.253926391078274985E-01 -0.435042938363147018E-03 -0.435055716900843974E-03 -0.666666666666666970E-02 -0.138910376047635993E-03 -0.139605818867703004E-03 -0.166666666666667011E-01 -0.666666666666666970E-02 -0.162064605978737010E-03 0.755721724764775969E-02 -0.421583401256607965E-11 -0.164615581083446994E-03 -0.622132993821362964E-09 0.822717713000382066E-02 -0.666666666666666970E-02 -0.435031787535767993E-03 0.921658221859024927E-02 -0.671472362675333990E-03 0.000000000000000000E+00 -0.435066598341386974E-03 -0.622856281165874953E-03 0.926582425818990064E-02 -0.666666666666666970E-02 -0.435031698814498024E-03 0.921598015968093029E-02 -0.671350185242260036E-03 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435066687046999979E-03 0.000000000000000000E+00 -0.622803871309166967E-03 0.926515487581706064E-02 -0.666666666666666970E-02 -0.162091523387943991E-03 0.755721896647259996E-02 -0.540935128117279988E-11 0.000000000000000000E+00 -0.164750534006632987E-03 -0.795366002646085000E-09 0.822717800583096931E-02 0.926452029956657992E-02 -0.622281045706827999E-03 -0.666666666666666970E-02 -0.668721208315983004E-03 0.920448404025630065E-02 -0.666666666666666970E-02 0.920291930713102065E-02 -0.668399798797767049E-03 -0.622140084769083981E-03 0.926273190908652976E-02 -0.666666666666666970E-02 -0.162061623289820004E-03 0.755721877319791985E-02 -0.410171929572062999E-11 -0.164601757794207001E-03 -0.605571814693378962E-09 0.822717790541416943E-02 0.921717278330758984E-02 -0.671606351121000053E-03 -0.666666666666666970E-02 -0.622958188273658011E-03 0.926596681290394082E-02 -0.666666666666666970E-02 -0.164300067974464987E-03 0.822730729925378983E-02 -0.253113151843113024E-09 -0.164103118034321003E-03 -0.251546774270035010E-09 0.822722715999001965E-02 -0.666666666666666970E-02 -0.435040487769458974E-03 0.921687604564088055E-02 -0.671540363061452956E-03 0.000000000000000000E+00 -0.435057899246222006E-03 -0.622872037532872002E-03 0.926585755069535971E-02 -0.666666666666666970E-02 -0.162127209687436009E-03 0.759121330666192966E-02 -0.381352367097723024E-11 0.000000000000000000E+00 -0.164582493974596989E-03 -0.545377294793035977E-09 0.822717728074968957E-02 -0.666666666666666970E-02 0.000000000000000000E+00 -0.166319287245753013E-03 0.755721788134880966E-02 -0.125543672150161994E-10 -0.666666666666666970E-02 0.000000000000000000E+00 -0.166854288422184003E-03 0.000000000000000000E+00 -0.695251112369320969E-09 0.759121397932040025E-02 -0.666666666666666970E-02 0.921698284063145017E-02 -0.671564905209266956E-03 -0.622877569603531964E-03 0.926586949940542970E-02 -0.666666666666666970E-02 -0.435044292403726006E-03 0.921676136558105021E-02 -0.671305578260984965E-03 -0.435054094871408024E-03 -0.622625142915718996E-03 0.926561885584748987E-02 -0.164408450418608012E-03 0.822722728633254984E-02 -0.379408966293651983E-09 -0.162009019530446013E-03 -0.666666666666666970E-02 -0.254284697644517994E-11 0.755732846813902005E-02 -0.162062503890727003E-03 -0.666666666666666970E-02 0.755732483871392972E-02 -0.412630295389444991E-11 -0.164604272365608011E-03 -0.609067369792561007E-09 0.822717730203972007E-02 -0.162008946283214003E-03 -0.666666666666666970E-02 0.755728526608244033E-02 -0.254314505624484981E-11 -0.164408477352361010E-03 -0.379466146432811988E-09 0.822722728638950081E-02 -0.666666666666666970E-02 -0.162061922095729002E-03 0.755721774336119961E-02 -0.411297420986845978E-11 -0.164602266437615996E-03 -0.607177113853009997E-09 0.822717717450133050E-02 0.926588728390210017E-02 -0.622885394141277989E-03 -0.666666666666666970E-02 -0.671602423890051950E-03 0.921714903947036014E-02 -0.435048603885989987E-03 -0.666666666666666970E-02 0.921700338564301024E-02 -0.671444778088914037E-03 -0.435049783507797990E-03 -0.622737470874805995E-03 0.926573446347610068E-02 -0.666666666666666970E-02 0.897119442365579972E-02 -0.606814776285979950E-03 -0.607757345476577036E-03 0.923427126759777082E-02 0.921865858272334984E-02 -0.671454242057297965E-03 -0.666666666666666970E-02 -0.646212455869751953E-03 0.929466342228658926E-02 -0.192798491760539997E-03 -0.666666666666666970E-02 0.825745086852652972E-02 -0.932628952743546979E-06 -0.167240534534016013E-03 -0.225014212014958993E-07 0.822685297950877942E-02 -0.666666666666666970E-02 -0.435814163537570013E-03 0.921887387061852068E-02 -0.575307467536130958E-03 -0.434281293408232021E-03 -0.604501016235682955E-03 0.915698677472209961E-02 -0.413213566066566995E-03 -0.666666666666666970E-02 0.877723361706403069E-02 -0.541754211244169052E-03 0.000000000000000000E+00 -0.454683602340957979E-03 -0.590990555173378953E-03 0.920776881757097952E-02 -0.666666666666666970E-02 -0.163098401897450001E-03 0.763192105293324010E-02 -0.471912653584295006E-04 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.530165759527253025E-03 0.000000000000000000E+00 -0.266005332113615023E-03 0.861802401723344921E-02 -0.666666666666666970E-02 -0.162004211770428006E-03 0.757730699043844026E-02 -0.176948923798146998E-05 -0.529438965832964951E-03 -0.405267405870187984E-04 0.875391888040484047E-02 0.921979764348120068E-02 -0.602143221168287973E-03 -0.666666666666666970E-02 -0.555336710991042974E-03 0.881585220388428001E-02 -0.666666666666666970E-02 -0.163010868628506010E-03 0.795402855233807993E-02 -0.273140602330698999E-11 -0.164270868470497012E-03 -0.213417929117084997E-09 0.822716826170043040E-02 -0.666666666666666970E-02 0.927693305369737056E-02 -0.632604553292874993E-03 -0.671545581861832000E-03 0.921776054175638994E-02 -0.161978982193291990E-03 -0.666666666666666970E-02 0.755685732173456970E-02 -0.195386460922786995E-11 -0.164191969605661007E-03 -0.291699488577104992E-09 0.822713227252660009E-02 -0.666666666666666970E-02 -0.162064640844275011E-03 0.755721703602540030E-02 -0.421716748215653003E-11 -0.164615755229752009E-03 -0.622326809900769984E-09 0.822717689294136939E-02 -0.666666666666666970E-02 -0.435030941620315019E-03 0.921655770481275005E-02 -0.671470405959126958E-03 0.000000000000000000E+00 -0.435067444105677011E-03 -0.622859268229516961E-03 0.926582520767522955E-02 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435032188115217974E-03 0.921571954136004949E-02 -0.671094905936150000E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435066197830839016E-03 0.000000000000000000E+00 -0.622560079517476997E-03 0.926486481761856079E-02 -0.666666666666666970E-02 -0.162178699975160996E-03 0.755732514584614001E-02 -0.123678884662636997E-10 -0.165233346068301005E-03 -0.180156095420544003E-08 0.822721902239778938E-02 0.925859295317845929E-02 -0.620261598377330964E-03 -0.666666666666666970E-02 -0.659675714565077006E-03 0.916581375466305956E-02 -0.666666666666666970E-02 0.818856060767081048E-02 -0.736710799666523000E-11 -0.221521744718127990E-09 0.822698054922356946E-02 -0.666666666666666970E-02 -0.162061886876159011E-03 0.755721683506239972E-02 -0.411153398251473010E-11 -0.164602034973986998E-03 -0.606965384602901960E-09 0.822717599389061020E-02 -0.435048835805744005E-03 0.921717024009354943E-02 -0.671607913788692021E-03 -0.435049551589144980E-03 -0.666666666666666970E-02 -0.622896101942852008E-03 0.926589988923041952E-02 -0.666666666666666970E-02 -0.164092201396150991E-03 0.822715599945273920E-02 -0.245104005328256975E-09 -0.164092160478588006E-03 -0.245036767609787999E-09 0.822714877827463061E-02 -0.666666666666666970E-02 -0.435040451026806000E-03 0.921687587838842040E-02 -0.671541321966330983E-03 -0.435057935985662001E-03 -0.622873182117880046E-03 0.926585851990125060E-02 -0.666666666666666970E-02 -0.162127502447745005E-03 0.759121254289110009E-02 -0.382274259639387968E-11 0.000000000000000000E+00 -0.164583524873151011E-03 -0.546666909348633031E-09 0.822717637948596967E-02 -0.666666666666666970E-02 -0.166319284889783996E-03 0.755721738061322038E-02 -0.125530862526258001E-10 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.166854283136274001E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.695179911733615969E-09 0.759121329234466961E-02 -0.435040521890532996E-03 -0.666666666666666970E-02 0.921687815467167083E-02 -0.671542181838102967E-03 -0.435057865128120000E-03 -0.622873658606718991E-03 0.926585863114167026E-02 -0.666666666666666970E-02 -0.435041288920689020E-03 0.921669954253361012E-02 -0.671349556748095989E-03 -0.435057098161677983E-03 -0.622686448593169997E-03 0.926564688510513046E-02 -0.164325652029393012E-03 0.822714890084452928E-02 -0.369068713238530013E-09 -0.162006572449318004E-03 -0.666666666666666970E-02 -0.248169403822924009E-11 0.755729660596458994E-02 -0.162062728183659998E-03 -0.666666666666666970E-02 0.755729867674227035E-02 -0.413683822802849013E-11 -0.164605417097992009E-03 -0.610604701856152959E-09 0.822717650154041955E-02 -0.162006472670696007E-03 -0.666666666666666970E-02 0.755723750063842965E-02 -0.248209213801367997E-11 -0.164325689607119994E-03 -0.369144828512267008E-09 0.822714890092034017E-02 -0.666666666666666970E-02 -0.162062060860401010E-03 0.755721735591260990E-02 -0.411820697467498986E-11 -0.164602902125366996E-03 -0.607936291979959974E-09 0.822717685023799036E-02 -0.435051421420119008E-03 0.926588186737323932E-02 -0.622883850263154012E-03 -0.435046965950540009E-03 -0.666666666666666970E-02 -0.671591030490054954E-03 0.921709452945720931E-02 -0.435046965633857023E-03 -0.666666666666666970E-02 0.921703017052688013E-02 -0.671537494669817957E-03 -0.435051421736794026E-03 -0.622834247686287991E-03 0.926581357500960018E-02 -0.666666666666666970E-02 0.755832397448806970E-02 -0.411138722941705998E-11 -0.609957135264493987E-09 0.822725821738184981E-02 0.926024553873549920E-02 -0.621022213891748000E-03 -0.666666666666666970E-02 -0.663176099841847051E-03 0.918049665304914927E-02 -0.433952552704485026E-03 -0.666666666666666970E-02 0.917940522365195966E-02 -0.660850447221769969E-03 -0.436139822938484006E-03 -0.618641376898239033E-03 0.925939891361087994E-02 -0.666666666666666970E-02 -0.436004520088601995E-03 0.917698869910051063E-02 -0.534972138649924970E-03 -0.434089279817291974E-03 -0.560727539174508006E-03 0.911474373371438072E-02 -0.405342384042105025E-03 -0.666666666666666970E-02 0.868224382189996917E-02 -0.501678846320934967E-03 0.000000000000000000E+00 -0.460809060945838991E-03 -0.586502661037227048E-03 0.920011158722021036E-02 -0.666666666666666970E-02 -0.163211684871365992E-03 0.764753570908803006E-02 -0.433463713660776003E-04 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.529849870720847950E-03 0.000000000000000000E+00 -0.242696464819676003E-03 0.855013113167163961E-02 -0.666666666666666970E-02 -0.162715770416370988E-03 0.761282345029158040E-02 -0.385507239315511005E-04 -0.531931290617636970E-03 -0.289327756212754013E-03 0.896268459992746042E-02 0.822708378408086929E-02 -0.453015989481950975E-09 -0.666666666666666970E-02 -0.344818166136396003E-11 0.765885647535816984E-02 -0.666666666666666970E-02 -0.162981032279518011E-03 0.765876435591864024E-02 -0.850978247264382040E-09 -0.167181094192769006E-03 -0.102217899652555996E-06 0.822724871883005922E-02 -0.666666666666666970E-02 0.899524486155203924E-02 -0.614036210565926987E-03 -0.609553123871605014E-03 0.923729250745260015E-02 -0.162146754878560000E-03 -0.666666666666666970E-02 0.755888395212829002E-02 -0.877868214680283015E-11 -0.165048028543372008E-03 -0.128278104037022006E-08 0.822721063460546062E-02 -0.666666666666666970E-02 -0.435042623242022023E-03 0.921694787133059065E-02 -0.671556689557351017E-03 -0.435055763937149977E-03 -0.622875549102152042E-03 0.926586538966900006E-02 -0.666666666666666970E-02 -0.435049192464642004E-03 0.921716877978374927E-02 -0.671606871920753961E-03 0.000000000000000000E+00 -0.435049194930888991E-03 -0.622886313955485970E-03 0.926588938212210070E-02 -0.666666666666666970E-02 -0.162062558982797991E-03 0.755721781611762029E-02 -0.413711697147502992E-11 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.164605380154927001E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.610686417053202014E-09 0.822717710018053966E-02 -0.666666666666666970E-02 -0.435040485643464010E-03 0.921687647065221917E-02 -0.671540876850887998E-03 -0.435057901372032005E-03 -0.622872543909229947E-03 0.926585805993978932E-02 0.822717712936564018E-02 -0.606929319768994989E-09 -0.666666666666666970E-02 -0.411128314368316016E-11 0.755721765744643992E-02 -0.666666666666666970E-02 0.755721765744643992E-02 -0.411128317078820962E-11 -0.606929319768994989E-09 0.822717712936564018E-02 -0.666666666666666970E-02 -0.162062002168695000E-03 0.755721765497851042E-02 -0.411600421982957979E-11 -0.164602608742393997E-03 -0.607615952871632013E-09 0.822717713082168033E-02 -0.435050788864285991E-03 0.926588513557140975E-02 -0.622885206966810982E-03 -0.435047598518492997E-03 -0.666666666666666970E-02 -0.671596264858268996E-03 0.921711667933667982E-02 -0.666666666666666970E-02 -0.435047598512343999E-03 0.921711525903850040E-02 -0.671594794936314007E-03 -0.435050788870434014E-03 -0.622883798838939949E-03 0.926588365351022937E-02 -0.666666666666666970E-02 -0.162064603060552002E-03 0.755721765252307005E-02 -0.421568902488327991E-11 -0.164615562620811005E-03 -0.622111732773898991E-09 0.822717714449052936E-02 -0.666666666666666970E-02 -0.435031767332582026E-03 0.921658397831942068E-02 -0.671474831069555976E-03 0.000000000000000000E+00 -0.435066618541047982E-03 -0.622858773502414035E-03 0.926582672366347941E-02 -0.666666666666666970E-02 -0.435031759872446977E-03 0.921598447326531928E-02 -0.671353605549905000E-03 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435066625999740012E-03 0.000000000000000000E+00 -0.622806880419023005E-03 0.926515741228120006E-02 -0.435040467082004024E-03 -0.666666666666666970E-02 0.921687727787984939E-02 -0.671542217009017000E-03 -0.435057919931870025E-03 -0.622873933298062990E-03 0.926585948649214994E-02 -0.666666666666666970E-02 -0.162062558212559002E-03 0.755721765246759013E-02 -0.413710611008680003E-11 -0.164605378163800012E-03 -0.610684955957268983E-09 0.822717713313388978E-02 -0.435057895396635016E-03 0.926585809258491030E-02 -0.622872563956516044E-03 -0.435040491619382013E-03 -0.666666666666666970E-02 -0.671540933200040989E-03 0.921687668189401059E-02 -0.164358965526126012E-03 -0.666666666666666970E-02 0.822694399085341019E-02 -0.400851057692599981E-09 -0.164360419602091007E-03 -0.404402306244331983E-09 0.822717693055982982E-02 -0.435040480593904986E-03 -0.666666666666666970E-02 0.921687631186799035E-02 -0.671540849649739004E-03 -0.435057906421149976E-03 -0.622872546527165980E-03 0.926585805293679045E-02 -0.666666666666666970E-02 -0.162062559343999999E-03 0.755721765385561003E-02 -0.413714921327754003E-11 -0.164605383792317005E-03 -0.610691224557087966E-09 0.822717713418924003E-02 -0.435057910263422993E-03 0.926585952122008921E-02 -0.622873948580168974E-03 -0.435040476751295975E-03 -0.666666666666666970E-02 -0.671542290274572949E-03 0.921687760235279964E-02 -0.164358958283310994E-03 -0.666666666666666970E-02 0.822694359327636945E-02 -0.400841676414764018E-09 -0.164360412981448008E-03 -0.404398890304639026E-09 0.822717692927778937E-02 -0.666666666666666970E-02 0.755721765237515972E-02 -0.411128322752053976E-11 -0.606929330558722046E-09 0.822717712936565058E-02 0.822717692676106990E-02 -0.401916224886799020E-09 -0.666666666666666970E-02 -0.398503993611501997E-09 0.822695176179768020E-02 -0.435040782183989977E-03 -0.666666666666666970E-02 0.921688643492079955E-02 -0.671543136482274949E-03 -0.435057604856947981E-03 -0.622873024631925972E-03 0.926585913880650924E-02 -0.666666666666666970E-02 -0.162064603067295007E-03 0.755721765304565030E-02 -0.421568935096809032E-11 -0.164615562649187992E-03 -0.622111764150747044E-09 0.822717714448892995E-02 -0.435031767264953018E-03 -0.666666666666666970E-02 0.921658397617671973E-02 -0.671474830688970005E-03 0.000000000000000000E+00 -0.435066618608665010E-03 -0.622858773521975970E-03 0.926582672355137985E-02 -0.666666666666666970E-02 -0.435031759867127014E-03 0.921598447384829045E-02 -0.671353608399908978E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435066626005059000E-03 0.000000000000000000E+00 -0.622806883450133957E-03 0.926515741287571928E-02 -0.666666666666666970E-02 -0.162062535484642990E-03 0.755721765241379028E-02 -0.413624149795890984E-11 -0.164605264966733003E-03 -0.610559194893120966E-09 0.822717713298093054E-02 0.822717692676106990E-02 -0.401916224778066025E-09 -0.666666666666666970E-02 -0.398503993617073007E-09 0.822695176179807051E-02 -0.666666666666666970E-02 -0.435040782184004994E-03 0.921688643492129048E-02 -0.671543136482392043E-03 -0.435057604856933019E-03 -0.622873024631955029E-03 0.926585913880656996E-02 -0.666666666666666970E-02 0.755721765237515018E-02 -0.411128322752061973E-11 -0.606929330558734970E-09 0.822717712936565058E-02 -0.162062535484639006E-03 -0.666666666666666970E-02 0.755721765241101993E-02 -0.413624149803294008E-11 -0.164605264966739996E-03 -0.610559194905281038E-09 0.822717713298093054E-02 -0.666666666666666970E-02 -0.435040978876939991E-03 0.921689266842156026E-02 -0.671544215875962010E-03 -0.435057408180386984E-03 -0.622872937769437958E-03 0.926585946462473060E-02 -0.666666666666666970E-02 -0.162127387652292998E-03 0.759121301093484014E-02 -0.381917589201341967E-11 0.000000000000000000E+00 -0.164583145431396004E-03 -0.546168929922635014E-09 0.822717717091626943E-02 -0.666666666666666970E-02 -0.166319270517048003E-03 0.755721782869738998E-02 -0.125468645423563995E-10 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.166854266113373008E-03 0.000000000000000000E+00 -0.694835327004200008E-09 0.759121369277350033E-02 -0.666666666666666970E-02 -0.162091523336998009E-03 0.755721898664039968E-02 -0.540934653228686967E-11 -0.164750533613684997E-03 -0.795365292654328000E-09 0.822717800588296938E-02 0.926452030366763969E-02 -0.622281047524610035E-03 -0.666666666666666970E-02 -0.668721216930222967E-03 0.920448407782870071E-02 -0.666666666666666970E-02 0.920291934460982022E-02 -0.668399807408691956E-03 -0.622140086592986989E-03 0.926273191317288080E-02 -0.666666666666666970E-02 -0.435042278634080015E-03 0.921682356015443051E-02 -0.671431390315878037E-03 -0.435056108521798986E-03 -0.622756696995610017E-03 0.926574695980873948E-02 -0.164604656267662011E-03 0.822717725537036940E-02 -0.609640569558637034E-09 -0.162062479874256008E-03 -0.666666666666666970E-02 -0.413006082275806018E-11 0.755726759085299012E-02 -0.666666666666666970E-02 -0.162006162226947987E-03 0.755727139217683957E-02 -0.248123277233144982E-11 -0.164398193103688998E-03 -0.370429925449825014E-09 0.822722962912562916E-02 -0.666666666666666970E-02 -0.162064599546255997E-03 0.755721765602699984E-02 -0.421555271070431014E-11 -0.164615545095071005E-03 -0.622091901724106049E-09 0.822717714627387020E-02 -0.666666666666666970E-02 -0.435031818763031985E-03 0.921658104872763971E-02 -0.671470214801736043E-03 0.000000000000000000E+00 -0.435066567119566977E-03 -0.622854034507517988E-03 0.926582206155602932E-02 -0.666666666666666970E-02 -0.435031752461292008E-03 0.921598570305218016E-02 -0.671353593598150960E-03 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435066633409600010E-03 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.622806719992169021E-03 0.926515905834468002E-02 -0.162062393641141989E-03 -0.666666666666666970E-02 0.755722114123831036E-02 -0.413058273432428028E-11 -0.164604687174786012E-03 -0.609739837378995011E-09 0.822717725546923996E-02 -0.666666666666666970E-02 -0.162060731985959994E-03 0.755721979203459980E-02 -0.406827649196786028E-11 -0.164598252833557000E-03 -0.600736958190943012E-09 0.822717859850461038E-02 -0.164089877343614012E-03 0.822722950591434077E-02 -0.245723982778697014E-09 -0.164740360816590010E-03 -0.666666666666666970E-02 -0.253032241614393982E-09 0.822760867484073968E-02 -0.435063218164074010E-03 -0.666666666666666970E-02 0.926625279535532066E-02 -0.623222898205732980E-03 -0.435035168245856982E-03 -0.671596115310440014E-03 0.921718087666401065E-02 -0.162006191893170997E-03 -0.666666666666666970E-02 0.755728897650832006E-02 -0.248111426872892018E-11 -0.164398182178321996E-03 -0.370407207277356981E-09 0.822722962910301010E-02 -0.666666666666666970E-02 -0.162062408593516993E-03 0.755721778752637980E-02 -0.413141814596678035E-11 -0.164604734980543005E-03 -0.609861061511916968E-09 0.822717721123181027E-02 -0.164359758083610009E-03 0.822717705188663086E-02 -0.403734886259138006E-09 -0.164358443024191010E-03 -0.666666666666666970E-02 -0.400953600054809976E-09 0.822699461577980004E-02 -0.435042369262490986E-03 -0.666666666666666970E-02 0.921678605590701971E-02 -0.671388823849048961E-03 -0.435056017899627977E-03 -0.622715238127973967E-03 0.926570506586106915E-02 -0.666666666666666970E-02 0.897119442339463016E-02 -0.606814776206859971E-03 -0.607757345455318975E-03 0.923427126754335081E-02 0.921865858631707932E-02 -0.671454241685535990E-03 -0.666666666666666970E-02 -0.646212508082324977E-03 0.929466349621914031E-02 -0.192798550395760997E-03 -0.666666666666666970E-02 0.825745095197305025E-02 -0.932632147975482013E-06 -0.167240534661016013E-03 -0.225014932097670997E-07 0.822685297960725967E-02 -0.666666666666666970E-02 -0.435853333051191975E-03 0.921982672935077983E-02 -0.576029289720921011E-03 -0.434241817036211018E-03 -0.604491260294178975E-03 0.915701687929672964E-02 -0.413213785008022022E-03 -0.666666666666666970E-02 0.877723644056978974E-02 -0.541755128362865035E-03 -0.454683425554907011E-03 -0.590990666065798007E-03 0.920776909864313986E-02 -0.666666666666666970E-02 -0.163098438357505001E-03 0.763192860819322978E-02 -0.471912218750692030E-04 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.530165770547247946E-03 -0.266001580374522018E-03 0.861802239077459967E-02 -0.666666666666666970E-02 -0.162004212070115012E-03 0.757730725874164977E-02 -0.176948864124370000E-05 -0.529438965863178955E-03 -0.405266342026540003E-04 0.875391878258710045E-02 0.921979766533967057E-02 -0.602143233936809023E-03 -0.666666666666666970E-02 -0.555336769390324045E-03 0.881585236637343062E-02 -0.666666666666666970E-02 -0.163010868918803012E-03 0.795402864518520922E-02 -0.273140627481144992E-11 -0.164270868404786012E-03 -0.213417891562764991E-09 0.822716826171294990E-02 -0.666666666666666970E-02 0.927693305629184034E-02 -0.632604555455441994E-03 -0.671545581847466972E-03 0.921776054189130979E-02 -0.161978982117798992E-03 -0.666666666666666970E-02 0.755685740523829967E-02 -0.195386057816982017E-11 -0.164191968508161013E-03 -0.291698868024367013E-09 0.822713227244221967E-02 -0.666666666666666970E-02 -0.435040481081572998E-03 0.921687625100787999E-02 -0.671540765653113052E-03 -0.435057905933523977E-03 -0.622872462287793992E-03 0.926585797472436953E-02 -0.666666666666666970E-02 -0.162127507324803994E-03 0.759121282163092038E-02 -0.382298621466253031E-11 -0.164583632044819009E-03 -0.546703997412867024E-09 0.822717713283360047E-02 -0.666666666666666970E-02 -0.166319256859904011E-03 0.755721769623335978E-02 -0.125414820597383008E-10 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.166854250172961013E-03 -0.694537633219151044E-09 0.759121350560284982E-02 -0.666666666666666970E-02 -0.162099247952961000E-03 0.755721989572244977E-02 -0.581368808752400979E-11 -0.164789516502383002E-03 -0.853932691235368040E-09 0.822717846311052034E-02 0.926237298878906934E-02 -0.621979743485002014E-03 -0.666666666666666970E-02 -0.667643958464268013E-03 0.919963618647812002E-02 -0.666666666666666970E-02 0.920119115376864978E-02 -0.667964676147511953E-03 -0.622121227338204037E-03 0.926416149864764914E-02 -0.666666666666666970E-02 -0.435040909817440986E-03 0.921687655493176952E-02 -0.671527187062299024E-03 -0.435057477234175008E-03 -0.622856729377207043E-03 0.926584500443678079E-02 -0.164605266534459990E-03 0.822717720592583933E-02 -0.610450247156119009E-09 -0.162062530200907004E-03 -0.666666666666666970E-02 -0.413549431266714970E-11 0.755722446015559979E-02 -0.666666666666666970E-02 -0.162059365323539989E-03 0.755722567800456990E-02 -0.401936380178025961E-11 -0.164612123412715989E-03 -0.594277800995966983E-09 0.822719391360227960E-02 -0.666666666666666970E-02 -0.162064601680787996E-03 0.755721765533357015E-02 -0.421563549128826973E-11 0.000000000000000000E+00 -0.164615555733905002E-03 -0.622103957379088996E-09 0.822717714640426936E-02 -0.666666666666666970E-02 -0.435031792976918995E-03 0.921658339535963086E-02 -0.671473400089229013E-03 0.000000000000000000E+00 -0.435066592901186004E-03 -0.622857239963683028E-03 0.926582531946548021E-02 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435031751019518008E-03 0.921598490375117978E-02 -0.671353543894939037E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435066634851121988E-03 0.000000000000000000E+00 -0.622806775506295953E-03 0.926515820468288079E-02 -0.162062519267639990E-03 -0.666666666666666970E-02 0.755721857194846958E-02 -0.413556062572659997E-11 -0.164605270453123994E-03 -0.610462848248556989E-09 0.822717720593840046E-02 -0.666666666666666970E-02 -0.162062243310156996E-03 0.755721835822449010E-02 -0.412518743024167011E-11 -0.164604700854507999E-03 -0.608980634819725018E-09 0.822717779878246944E-02 -0.164369437450745994E-03 0.822719371500816969E-02 -0.393313770364309007E-09 -0.164344328728707013E-03 -0.666666666666666970E-02 -0.391103618326530975E-09 0.822706248393468946E-02 -0.435044237783256996E-03 -0.666666666666666970E-02 0.921700053762436987E-02 -0.671567082152385998E-03 -0.435054149489181002E-03 -0.622876302108100002E-03 0.926586962058313993E-02 -0.162059379398541000E-03 -0.666666666666666970E-02 0.755723329728289969E-02 -0.401928043513828996E-11 -0.164612118388109011E-03 -0.594261933530200978E-09 0.822719391358646933E-02 -0.666666666666666970E-02 -0.162062495727046009E-03 0.755721772845492981E-02 -0.413473254943709991E-11 -0.164605146051557987E-03 -0.610342338260452991E-09 0.822717719432444067E-02 -0.164360315553491003E-03 0.822717700213156967E-02 -0.404232600081692006E-09 -0.164358887835959998E-03 -0.666666666666666970E-02 -0.400997831179622018E-09 0.822696488974702066E-02 -0.435041264130012022E-03 -0.666666666666666970E-02 0.921687154445821957E-02 -0.671511051258173990E-03 -0.435057122950393985E-03 -0.622839088072463958E-03 0.926582874613315031E-02 -0.666666666666666970E-02 0.927696108771089954E-02 -0.632627917074633003E-03 -0.671545420285009982E-03 0.921776198086651961E-02 0.922026584017879985E-02 -0.602408843182564049E-03 -0.666666666666666970E-02 -0.556545745722415003E-03 0.881922185436606956E-02 -0.163017068240428012E-03 -0.666666666666666970E-02 0.795594799175567031E-02 -0.273445423603751999E-11 -0.164269850461446013E-03 -0.212776812668013003E-09 0.822716866999695026E-02 -0.666666666666666970E-02 -0.435073828677985024E-03 0.926998697001005928E-02 -0.626775693155938018E-03 -0.435024555678502995E-03 -0.674658125777386950E-03 0.922087193677081918E-02 -0.317388619046037024E-03 0.000000000000000000E+00 -0.666666666666666970E-02 0.807555110066160951E-02 -0.161231565052261995E-03 -0.506658534832258992E-03 -0.311775039000433988E-03 0.895372823636290081E-02 -0.666666666666666970E-02 -0.163033789441224992E-03 0.757712252412545001E-02 -0.230784432560672012E-04 0.000000000000000000E+00 -0.666666666666666970E-02 -0.528851957602917964E-03 -0.189448307782970002E-03 0.832713693082491081E-02 -0.666666666666666970E-02 -0.161988418356935989E-03 0.755983351837515982E-02 -0.201738757027985980E-11 -0.164212922403823006E-03 -0.300296756336277975E-09 0.822710828992056012E-02 0.921859794240553060E-02 -0.671461758481382994E-03 -0.666666666666666970E-02 -0.645287749704886960E-03 0.929336169606828980E-02 -0.666666666666666970E-02 -0.192231218611419992E-03 0.825469369225220045E-02 -0.211366001320515989E-06 -0.166616012215181007E-03 -0.134857476054616004E-07 0.822777129012250072E-02 -0.666666666666666970E-02 0.902089049785161946E-02 -0.621512175397749989E-03 -0.611398199404824985E-03 0.924071304541716938E-02 -0.162013975253551008E-03 -0.666666666666666970E-02 0.758489435794543979E-02 -0.176583046930321999E-05 -0.529428370426132016E-03 -0.381593418223961990E-04 0.875176249380833077E-02 -0.666666666666666970E-02 -0.435040249760507993E-03 0.921686856770915011E-02 -0.671539094886240001E-03 -0.435058137234120004E-03 -0.622872175240221974E-03 0.926585722647491929E-02 -0.666666666666666970E-02 -0.435049192418392975E-03 0.921716878281565039E-02 -0.671606876372485046E-03 -0.435049194977138020E-03 -0.622886318490127947E-03 0.926588938673533064E-02 -0.666666666666666970E-02 0.000000000000000000E+00 -0.162062559002504992E-03 0.755721765529914023E-02 -0.413713063586915014E-11 -0.666666666666666970E-02 0.000000000000000000E+00 -0.164605381839701988E-03 -0.610688496265456990E-09 0.822717709903768996E-02 -0.666666666666666970E-02 -0.435040481539034974E-03 0.921687633332718068E-02 -0.671540846145312980E-03 -0.435057905476102008E-03 -0.622872537796382957E-03 0.926585804562602040E-02 0.822717712936564018E-02 -0.606929320140830976E-09 -0.666666666666666970E-02 -0.411128314549578026E-11 0.755721765728442022E-02 -0.666666666666666970E-02 0.755721765728442022E-02 -0.411128314549578026E-11 -0.606929320113726025E-09 0.822717712936564018E-02 -0.666666666666666970E-02 -0.162062535444855996E-03 0.755721765299004027E-02 -0.413623988345109963E-11 -0.164605264762881988E-03 -0.610558967689581019E-09 0.822717713298422998E-02 -0.164360411104607013E-03 0.822717692934940049E-02 -0.404392079821727997E-09 -0.164359007307861987E-03 -0.666666666666666970E-02 -0.400959182056933016E-09 0.822695177830758996E-02 -0.666666666666666970E-02 -0.435040782704709025E-03 0.921688645230588945E-02 -0.671543140332089947E-03 -0.435057604336271976E-03 -0.622873025362695042E-03 0.926585914058404916E-02 -0.666666666666666970E-02 -0.435040480198235974E-03 0.921687629980992920E-02 -0.671540848018962978E-03 0.000000000000000000E+00 -0.435057906816784022E-03 -0.622872547225220984E-03 0.926585805277986042E-02 -0.666666666666666970E-02 -0.435049192309392005E-03 0.921716878048172046E-02 -0.671606876991212012E-03 -0.435049195086140020E-03 -0.622886319711294994E-03 0.926588938772130062E-02 -0.666666666666666970E-02 -0.162062559108483992E-03 0.755721765240897035E-02 -0.413713455792400007E-11 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.164605382375200988E-03 -0.610689046115110989E-09 0.822717709593154961E-02 -0.162062559033916009E-03 -0.666666666666666970E-02 0.755721765255952006E-02 -0.413713742434565006E-11 -0.164605382254266990E-03 -0.610689502144610012E-09 0.822717713322326967E-02 -0.666666666666666970E-02 -0.162062001821161990E-03 0.755721765238182019E-02 -0.411599099559748981E-11 -0.164602607031998988E-03 -0.607614053906671031E-09 0.822717713004775947E-02 -0.435050784143273998E-03 0.926588367156185033E-02 -0.622883807548416991E-03 -0.435047603239580017E-03 -0.666666666666666970E-02 -0.671594832030014948E-03 0.921711541879277965E-02 -0.435047603239578987E-03 -0.666666666666666970E-02 0.921711541854514961E-02 -0.671594831758394982E-03 -0.435050784143274974E-03 -0.622883807286240968E-03 0.926588367130479033E-02 -0.435040480204222993E-03 -0.666666666666666970E-02 0.921687629993508950E-02 -0.671540847984813971E-03 -0.435057906810797979E-03 -0.622872547158363982E-03 0.926585805272238036E-02 -0.666666666666666970E-02 -0.435040480199119979E-03 0.921687628837800955E-02 -0.671540836043143011E-03 -0.435057906815900994E-03 -0.622872535731676044E-03 0.926585804082549064E-02 -0.164605382251114998E-03 0.822717713322325926E-02 -0.610689492032270982E-09 -0.162062559042711003E-03 -0.666666666666666970E-02 -0.413713737102918003E-11 0.755721765729555021E-02 -0.162062559115393992E-03 -0.666666666666666970E-02 0.755721765728750976E-02 -0.413714006582654996E-11 -0.164605382612441987E-03 -0.610689891716875000E-09 0.822717713311152052E-02 -0.666666666666666970E-02 0.755721765237515972E-02 -0.411128320041545960E-11 -0.606929330558716048E-09 0.822717712936565058E-02 0.822717692676106990E-02 -0.401916224778854018E-09 -0.666666666666666970E-02 -0.398503993603185006E-09 0.822695176179710080E-02 -0.435040782183968022E-03 -0.666666666666666970E-02 0.921688643492006056E-02 -0.671543136482093996E-03 -0.435057604856969990E-03 -0.622873024631876967E-03 0.926585913880641036E-02 -0.666666666666666970E-02 -0.162064603069931001E-03 0.755721765252316026E-02 -0.421568944254191005E-11 -0.164615562667500004E-03 -0.622111785613204007E-09 0.822717714448713971E-02 -0.435031767249101982E-03 -0.666666666666666970E-02 0.921658397565039943E-02 -0.671474830574549054E-03 0.000000000000000000E+00 -0.435066618624513010E-03 -0.622858773502365029E-03 0.926582672349992968E-02 -0.666666666666666970E-02 -0.435031759837616007E-03 0.921598447232674020E-02 -0.671353608078640034E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435066626034565020E-03 0.000000000000000000E+00 -0.622806883368089994E-03 0.926515741217365935E-02 -0.666666666666666970E-02 -0.162062535484635997E-03 0.755721765240871968E-02 -0.413624136259383020E-11 -0.164605264966749998E-03 -0.610559194918970050E-09 0.822717713298093921E-02 0.822717692676106990E-02 -0.401916224724624988E-09 -0.666666666666666970E-02 -0.398503993603535989E-09 0.822695176179711989E-02 -0.666666666666666970E-02 -0.435040782183968998E-03 0.921688643492009005E-02 -0.671543136482101043E-03 -0.435057604856969014E-03 -0.622873024631878051E-03 0.926585913880642077E-02 -0.666666666666666970E-02 0.755721765237515972E-02 -0.411128320041545960E-11 -0.606929330558716048E-09 0.822717712936565058E-02 -0.162062535484635997E-03 -0.666666666666666970E-02 0.755721765240869973E-02 -0.413624144391172994E-11 -0.164605264966749998E-03 -0.610559194919383020E-09 0.822717713298093921E-02 -0.666666666666666970E-02 -0.435033689603804975E-03 0.921378384069738987E-02 -0.668589820131555040E-03 -0.435064696586663006E-03 -0.620092661077489989E-03 0.926283488586463963E-02 -0.666666666666666970E-02 -0.162162554053133000E-03 0.761060627573421043E-02 -0.358738548769622997E-11 -0.164556192142978992E-03 -0.502148070051966022E-09 0.822717405269838935E-02 -0.666666666666666970E-02 0.000000000000000000E+00 -0.163092637467958997E-03 0.755846712970139993E-02 0.813151629364127964E-19 -0.666666666666666970E-02 0.000000000000000000E+00 -0.163377020705577990E-03 -0.197563428050820995E-37 0.761068726943637999E-02 -0.666666666666666970E-02 -0.415446954693720976E-03 0.879694625554016015E-02 -0.528338181427255972E-03 -0.452858816762892013E-03 -0.571958066866902980E-03 0.919346567847542998E-02 0.822717710541285077E-02 -0.582878441461362035E-09 -0.666666666666666970E-02 -0.398790879900396976E-11 0.756907608051521959E-02 -0.666666666666666970E-02 0.756911094403798970E-02 -0.399682676269469016E-11 -0.586791272997618962E-09 0.822723940155752065E-02 -0.666666666666666970E-02 -0.435082932504862979E-03 0.926680195170886956E-02 -0.623730586218653013E-03 -0.435015449187736995E-03 -0.671618387476765970E-03 0.921723408637379997E-02 -0.164605497784444998E-03 0.822717715380158997E-02 -0.610801293217642997E-09 -0.162062563985841001E-03 -0.666666666666666970E-02 -0.413787738555715978E-11 0.755721095967978960E-02 -0.666666666666666970E-02 -0.162067963173172003E-03 0.755721116190670970E-02 -0.434923998820925017E-11 -0.164635999950897013E-03 -0.641653733272013955E-09 0.822717987065326069E-02 -0.666666666666666970E-02 -0.162064602794511991E-03 0.755721765349726994E-02 -0.421567878592677010E-11 0.000000000000000000E+00 -0.164615561292109994E-03 -0.622110240351067001E-09 0.822717714537648040E-02 -0.666666666666666970E-02 -0.435031773851333981E-03 0.921658402777584054E-02 -0.671474653364973048E-03 0.000000000000000000E+00 -0.435066612023434006E-03 -0.622858559544564984E-03 0.926582657482898954E-02 -0.666666666666666970E-02 -0.435031756377946010E-03 0.921598454249483029E-02 -0.671353578907302054E-03 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435066629493629977E-03 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.622806849901958001E-03 0.926515761104865047E-02 -0.162062576485603999E-03 -0.666666666666666970E-02 0.755721769097717974E-02 -0.413780156463713995E-11 -0.164605493304321993E-03 -0.610786879472733959E-09 0.822717715378724034E-02 -0.666666666666666970E-02 -0.162062600946185996E-03 0.755721775647281990E-02 -0.413875277158846027E-11 -0.164605835862048996E-03 -0.610932451422013981E-09 0.822717731789045920E-02 -0.164390239995456007E-03 0.822717965629857044E-02 -0.424752159962523975E-09 -0.164384780603813011E-03 -0.666666666666666970E-02 -0.420718048853076981E-09 0.822693037781527942E-02 -0.435039866684628978E-03 -0.666666666666666970E-02 0.921685570981070970E-02 -0.671536106102198996E-03 -0.435058520274921989E-03 -0.622871476563668946E-03 0.926585585423385025E-02 -0.162067974342572992E-03 -0.666666666666666970E-02 0.755721712453114990E-02 -0.434916937807187029E-11 -0.164635995970298013E-03 -0.641640314610242035E-09 0.822717987063988944E-02 -0.666666666666666970E-02 -0.162062530993410003E-03 0.755721767391838006E-02 -0.413607275117757979E-11 -0.164605272998311010E-03 -0.610535653474081026E-09 0.822717715805352037E-02 -0.164360523631990990E-03 0.822717694987937058E-02 -0.404455003494009021E-09 -0.164359098316623007E-03 -0.666666666666666970E-02 -0.401040888608453987E-09 0.822695309202677028E-02 -0.435040829062186994E-03 -0.666666666666666970E-02 0.921689067116016926E-02 -0.671545174080842035E-03 -0.435057557982693015E-03 -0.622874572247597981E-03 0.926586218088495950E-02 -0.666666666666666970E-02 0.755944258225673023E-02 -0.409900186798689021E-11 -0.607550069944511006E-09 0.822725761878751946E-02 0.822717683107231958E-02 -0.307429333767333980E-09 -0.666666666666666970E-02 -0.346590492659272013E-11 0.788944072959344919E-02 -0.405969476545236976E-03 -0.666666666666666970E-02 0.868864828240666076E-02 -0.503334320458691947E-03 -0.460337026543280981E-03 -0.585683663662835011E-03 0.919951403822613992E-02 -0.666666666666666970E-02 -0.434971941726958005E-03 0.922397396166772972E-02 -0.680859150263666982E-03 -0.435126415795954993E-03 -0.632212694429038955E-03 0.927545373694507042E-02 -0.163258163012422003E-03 -0.666666666666666970E-02 0.771427725269166996E-02 -0.477633450518533990E-04 -0.532374012608134001E-03 -0.264816289609958973E-03 0.893589131240556979E-02 -0.666666666666666970E-02 -0.163022374738535991E-03 0.756010628736956004E-02 -0.985505903625681948E-05 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.528465266513926044E-03 -0.112888812204124995E-03 0.818271031310010069E-02 0.000000000000000000E+00 -0.666666666666666970E-02 -0.162166578301091007E-03 0.755933919789102040E-02 -0.104753730850025000E-10 -0.165099058739546009E-03 -0.152246080895752994E-08 0.822717141726839012E-02 0.822711899108566000E-02 -0.221645141852008993E-09 -0.666666666666666970E-02 -0.803318716673082971E-11 0.819386805739684074E-02 -0.666666666666666970E-02 -0.433725918784746999E-03 0.917430859668704086E-02 -0.661288978702212048E-03 -0.436363722694927986E-03 -0.620322134736719043E-03 0.926073953711441071E-02 -0.666666666666666970E-02 0.755741928796660033E-02 -0.410903028695346997E-11 -0.606500685244875008E-09 0.822717712893872993E-02 -0.162225051571071008E-03 -0.666666666666666970E-02 0.762801677615301994E-02 -0.369132365187440996E-05 -0.529778141305529949E-03 -0.883337217346735949E-04 0.880271406258762920E-02 -0.666666666666666970E-02 -0.415930173645343991E-03 0.874527436890885045E-02 -0.345835963857271027E-03 -0.452435882453698006E-03 -0.346466114750530014E-03 0.898436350079646028E-02 -0.666666666666666970E-02 -0.410333749350875977E-03 0.874024429470159937E-02 -0.526324780402007012E-03 -0.456977206374832004E-03 -0.594710214126934963E-03 0.921019309223168083E-02 -0.666666666666666970E-02 -0.349138390596867001E-03 0.808745258250311017E-02 -0.174389744426073004E-03 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.493489741678000975E-03 -0.295632081822985011E-03 0.859314073796063962E-02 -0.666666666666666970E-02 -0.162165456144072008E-03 0.755894484280849981E-02 -0.104756642479310002E-10 -0.165141511878991989E-03 -0.152686212523292010E-08 0.822721248571258931E-02 0.925936813944002075E-02 -0.620622043504368980E-03 -0.666666666666666970E-02 -0.661330297689515954E-03 0.917272030438071040E-02 -0.666666666666666970E-02 0.917272030438071040E-02 -0.661330297689515954E-03 -0.620622043504368980E-03 0.925936813944002075E-02 -0.666666666666666970E-02 -0.162062370509243004E-03 0.755772622350076983E-02 -0.408856756901033998E-11 -0.164599404158889001E-03 -0.603371041254018047E-09 0.822717649469902924E-02 -0.164363072970947994E-03 0.822717536870335060E-02 -0.406417442325602006E-09 -0.164592544349163996E-03 -0.666666666666666970E-02 -0.411459868924217992E-09 0.822733545578738061E-02 -0.666666666666666970E-02 -0.435055225953795985E-03 0.926605052142811039E-02 -0.623035876737523049E-03 -0.435043161259378005E-03 -0.671607266693034040E-03 0.921717853066967956E-02 -0.666666666666666970E-02 -0.435040481662372024E-03 0.921687634507689993E-02 -0.671540853956346050E-03 -0.435057905352776017E-03 -0.622872544450323051E-03 0.926585805410279004E-02 -0.666666666666666970E-02 -0.435049192197708014E-03 0.921716876912447025E-02 -0.671606868732131960E-03 0.000000000000000000E+00 -0.435049195197822981E-03 -0.622886312493042956E-03 0.926588937934206061E-02 -0.666666666666666970E-02 -0.162062574467009994E-03 0.755721765477525027E-02 -0.413771902340413000E-11 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.164605458858507004E-03 0.000000000000000000E+00 -0.610774061235578027E-09 0.822717709793101964E-02 -0.162063129702431002E-03 -0.666666666666666970E-02 0.755721488286742026E-02 -0.415889664939935991E-11 -0.164608241410742993E-03 -0.613854083280822974E-09 0.822717557368093065E-02 -0.666666666666666970E-02 -0.162062006116684993E-03 0.755721765538701958E-02 -0.411615339006451024E-11 -0.164602628392850010E-03 -0.607637651237361961E-09 0.822717712956365019E-02 -0.435050838802116988E-03 0.926588470716894000E-02 -0.622885079077197044E-03 -0.435047548579851017E-03 -0.666666666666666970E-02 -0.671595813711089052E-03 0.921711477741266050E-02 -0.435047548601973024E-03 -0.666666666666666970E-02 0.921711980798223078E-02 -0.671600947327989010E-03 -0.435050838779994981E-03 -0.622889987523941044E-03 0.926588996280537994E-02 -0.435040278978450999E-03 -0.666666666666666970E-02 0.921687073978887973E-02 -0.671540718908447947E-03 -0.435058108018793015E-03 -0.622873586970733978E-03 0.926585856104042080E-02 -0.666666666666666970E-02 -0.435040282159379990E-03 0.921688118588532938E-02 -0.671551578142270001E-03 -0.435058104838147977E-03 -0.622883989557962013E-03 0.926586935714399963E-02 -0.164608242952759995E-03 0.822717557368590063E-02 -0.613859067359255033E-09 -0.162063125398168002E-03 -0.666666666666666970E-02 -0.415892283159718001E-11 0.755721256705853967E-02 -0.162062565682641010E-03 -0.666666666666666970E-02 0.755721268194402968E-02 -0.413779732258981993E-11 -0.164605464633444011E-03 -0.610787973952621006E-09 0.822717713109540061E-02 -0.666666666666666970E-02 0.895231589357397932E-02 -0.601006644779317027E-03 -0.606288347917921001E-03 0.923179444181716075E-02 0.921580902907083964E-02 -0.671118539167890951E-03 -0.666666666666666970E-02 -0.628092210114751027E-03 0.927168309130125927E-02 -0.174566225284372993E-03 -0.666666666666666970E-02 0.823503971979330958E-02 -0.510771887614783954E-06 -0.167453071907815989E-03 -0.287656487025054002E-06 0.822679160087593994E-02 -0.666666666666666970E-02 -0.162064999238019001E-03 0.755721705065465991E-02 -0.423106066889392027E-11 -0.164617538202307007E-03 -0.624346471180137971E-09 0.822717661766741992E-02 -0.435027297636500021E-03 -0.666666666666666970E-02 0.921651161171017051E-02 -0.671532969891762031E-03 0.000000000000000000E+00 -0.435071087356345989E-03 -0.622941366988572030E-03 0.926589066343610004E-02 -0.666666666666666970E-02 -0.435031386261265976E-03 0.921594142114651009E-02 -0.671460160302111021E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435066999544915002E-03 0.000000000000000000E+00 -0.622929217552493962E-03 0.926511198239901926E-02 -0.666666666666666970E-02 -0.162052976426746003E-03 0.755715649452497022E-02 -0.378464898874789026E-11 -0.164557847455290991E-03 -0.559380698263932047E-09 0.822711615636790086E-02 0.921569960061689036E-02 -0.671199549999528054E-03 -0.666666666666666970E-02 -0.624992481787264051E-03 0.926820615335867057E-02 -0.666666666666666970E-02 -0.170162248239514002E-03 0.823042259709222922E-02 -0.875293298501571007E-07 -0.166825903639511989E-03 -0.692727623674652971E-07 0.822728043870269918E-02 -0.666666666666666970E-02 0.900314235021752035E-02 -0.616359389498942999E-03 -0.610140243508493002E-03 0.923851413586198981E-02 -0.162042399166736006E-03 -0.666666666666666970E-02 0.755717693384080035E-02 -0.343592701208587996E-11 -0.164505035953650008E-03 -0.508562496748712960E-09 0.822712657547092045E-02 -0.666666666666666970E-02 -0.162064604852803001E-03 0.755721682250679039E-02 -0.421581532487358014E-11 -0.164615579255980999E-03 -0.622130465298954038E-09 0.822717706389368073E-02 -0.666666666666666970E-02 -0.435031888747001994E-03 0.921662497939955937E-02 -0.671514002290806984E-03 0.000000000000000000E+00 -0.435066497147766000E-03 -0.622895617170409984E-03 0.926586574231100073E-02 -0.666666666666666970E-02 -0.435031628074990023E-03 0.921600144320258025E-02 -0.671374321788007020E-03 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435066757774081993E-03 0.000000000000000000E+00 -0.622827422523968997E-03 0.926517931888306938E-02 -0.666666666666666970E-02 -0.162168008296603987E-03 0.755720830543264981E-02 -0.111631538525960998E-10 -0.165130579439813012E-03 -0.162383789215979997E-08 0.822716472453222970E-02 0.822711943021576034E-02 -0.222017847039514990E-09 -0.666666666666666970E-02 -0.777760925671697071E-11 0.819196919955125953E-02 -0.666666666666666970E-02 0.917235354741821086E-02 -0.661205826288643948E-03 -0.620672923073943951E-03 0.926095692434543948E-02 -0.666666666666666970E-02 -0.162079935652635995E-03 0.755720153609774035E-02 -0.485738294134179040E-11 -0.164692040195761001E-03 -0.715311464965949002E-09 0.822716985670068034E-02 -0.166685978125188992E-03 0.822673928904943050E-02 -0.488041135101126002E-07 -0.166618503841219014E-03 -0.666666666666666970E-02 -0.388518789845462021E-07 0.822064815916740028E-02 -0.666666666666666970E-02 -0.434820802690177985E-03 0.921114685377608967E-02 -0.671312500692984007E-03 -0.435277323473812975E-03 -0.623870937176576037E-03 0.926677062396401990E-02 -0.666666666666666970E-02 -0.435040416088372018E-03 0.921688078845643056E-02 -0.671547258120006041E-03 -0.435057970921028016E-03 -0.622879055329303975E-03 0.926586475383493063E-02 -0.666666666666666970E-02 -0.435048641715767004E-03 0.921713990189115991E-02 -0.671590634431848974E-03 0.000000000000000000E+00 -0.435049745678237000E-03 -0.622873878413765037E-03 0.926587662260439961E-02 -0.666666666666666970E-02 -0.162065760605982007E-03 0.755721473567340981E-02 -0.425851668132086027E-11 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.164621246760972002E-03 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.628327256337155983E-09 0.822716093119939956E-02 -0.162542321360015994E-03 -0.666666666666666970E-02 0.755650259353967020E-02 -0.562981055991819022E-09 -0.167001540638854007E-03 -0.773289585904606956E-07 0.822676761660280989E-02 -0.666666666666666970E-02 -0.162061328166368990E-03 0.755722295014580030E-02 -0.409106119013757001E-11 -0.164607216616360989E-03 -0.604245617915833026E-09 0.822718306150760012E-02 -0.435044697597018011E-03 0.921886702873469036E-02 -0.673104082698351962E-03 -0.435053689697235003E-03 -0.666666666666666970E-02 -0.624556261940502019E-03 0.926763947624762059E-02 -0.435053689776730007E-03 -0.666666666666666970E-02 0.926765139731208082E-02 -0.624573254433248028E-03 -0.435044697517509996E-03 -0.673283560783819986E-03 0.921873769877605033E-02 -0.435024190531306987E-03 -0.666666666666666970E-02 0.921794328133558952E-02 -0.672853471668093963E-03 -0.435074193732175026E-03 -0.624365177556240967E-03 0.926744434610436013E-02 -0.666666666666666970E-02 -0.435000466897472022E-03 0.921805866981871037E-02 -0.673973626915920021E-03 -0.435097908603312988E-03 -0.625447361535428985E-03 0.926834572012657024E-02 -0.167001904506701996E-03 0.822676773783739997E-02 -0.774505155958715954E-07 -0.162540904682313003E-03 -0.666666666666666970E-02 -0.563656255578501956E-09 0.755607546000543022E-02 -0.162061504581954011E-03 -0.666666666666666970E-02 0.755611035298445026E-02 -0.418942890780224988E-11 -0.164621967289905991E-03 -0.619216517642183046E-09 0.822718518765969999E-02 -0.666666666666666970E-02 0.920452787793652957E-02 -0.668731214720833010E-03 -0.622283241864520008E-03 0.926452697307078056E-02 0.822724025612167967E-02 -0.586193188243385978E-09 -0.666666666666666970E-02 -0.399370806297400039E-11 0.756944692405215041E-02 -0.162114939468458987E-03 -0.666666666666666970E-02 0.756941303972038983E-02 -0.521957340938646036E-11 -0.164756390961434999E-03 -0.760317731950115969E-09 0.822719049518791032E-02 -0.666666666666666970E-02 -0.162348256591775996E-03 0.757138537095577036E-02 -0.154279395379191983E-04 -0.530720557527397988E-03 -0.221706573191253990E-03 0.891607556388816086E-02 -0.163035589715460993E-03 -0.666666666666666970E-02 0.791125444579478081E-02 -0.568881226622524034E-05 0.000000000000000000E+00 -0.530385305835545987E-03 -0.644753151430826047E-04 0.878526723619102966E-02 -0.666666666666666970E-02 -0.399599558422803001E-03 0.843930969858660944E-02 -0.399745648940732980E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.464884917361518986E-03 0.000000000000000000E+00 -0.476898399198092998E-03 0.874546494059007078E-02 -0.666666666666666970E-02 -0.162063070389469993E-03 0.755351606433557997E-02 -0.447630187954819978E-11 -0.164644944937428005E-03 -0.661962104712968044E-09 0.822717544598733017E-02 0.926551780113546962E-02 -0.622722840997360001E-03 -0.666666666666666970E-02 -0.670824268645125988E-03 0.921370889062123923E-02 -0.666666666666666970E-02 -0.434945178559459974E-03 0.921154593743686925E-02 -0.668579923758682053E-03 -0.435153154613506974E-03 -0.620572207517385986E-03 0.926326406971371939E-02 -0.666666666666666970E-02 0.755816621993720978E-02 -0.411326355534032026E-11 -0.610348502456840988E-09 0.822725905587259020E-02 -0.402874184979866987E-03 -0.666666666666666970E-02 0.865106066171478054E-02 -0.472609065736268020E-03 -0.462635081887802994E-03 -0.557606013013843028E-03 0.917391259078658046E-02 -0.666666666666666970E-02 -0.162816160491244000E-03 0.755629729934226019E-02 -0.145267309791605992E-07 -0.168362009612906010E-03 -0.190258327748242995E-05 0.822792910350438923E-02 -0.666666666666666970E-02 -0.434980287177094997E-03 0.921591582004819936E-02 -0.672220539599956038E-03 0.000000000000000000E+00 -0.435118076428732989E-03 -0.623871208911141008E-03 0.926674480491154057E-02 -0.666666666666666970E-02 -0.391663109206836005E-03 0.857592364748300043E-02 -0.270377370496600986E-03 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.470345934717641013E-03 0.000000000000000000E+00 -0.290023568937984979E-03 0.893922648721857081E-02 -0.666666666666666970E-02 -0.317994853290403008E-03 0.807402136257530031E-02 -0.163000017527141987E-03 -0.506466269749422031E-03 -0.315285081405852004E-03 0.895741238458845970E-02 0.822721524635505914E-02 -0.564092130899272960E-09 -0.666666666666666970E-02 -0.389036333446228008E-11 0.758050134032894028E-02 -0.666666666666666970E-02 0.758050134032894028E-02 -0.389036333446228008E-11 -0.564092130736643047E-09 0.822721524635505914E-02 -0.666666666666666970E-02 -0.162062220957438010E-03 0.755675930940021001E-02 -0.416185255233357985E-11 -0.164608241679856012E-03 -0.614515052693167971E-09 0.822717669744772034E-02 -0.435064264203551974E-03 0.926583708706145946E-02 -0.622864427576335982E-03 -0.435034122053574979E-03 -0.666666666666666970E-02 -0.671494672063580045E-03 0.921666478777227044E-02 -0.666666666666666970E-02 -0.435034122194007025E-03 0.921666834178432037E-02 -0.671498215917801949E-03 -0.435064264063141017E-03 -0.622867805184998033E-03 0.926584080673045936E-02 -0.666666666666666970E-02 -0.162064603057306009E-03 0.755721765249671960E-02 -0.421568895545965981E-11 -0.164615562604910991E-03 -0.622111714812958048E-09 0.822717714449142969E-02 -0.666666666666666970E-02 -0.435031767250997005E-03 0.921658395801952003E-02 -0.671474812281343020E-03 0.000000000000000000E+00 -0.435066618622617987E-03 -0.622858755966087044E-03 0.926582670501814067E-02 -0.666666666666666970E-02 -0.435031759994269993E-03 0.921598448039129964E-02 -0.671353611750878003E-03 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435066625877937976E-03 0.000000000000000000E+00 -0.622806885942570987E-03 0.926515741571650939E-02 -0.435040457277539980E-03 -0.666666666666666970E-02 0.921687744869944051E-02 -0.671542694130693044E-03 -0.435057929735475977E-03 -0.622874450455320979E-03 0.926585997023653997E-02 -0.666666666666666970E-02 -0.162062556267884004E-03 0.755721766013062960E-02 -0.413703217741733986E-11 -0.164605370008190008E-03 -0.610674234287949039E-09 0.822717713670985916E-02 -0.435057871804128975E-03 0.926586389852728995E-02 -0.622877919476854986E-03 -0.435040515213944027E-03 -0.666666666666666970E-02 -0.671546676413121014E-03 0.921688294468172957E-02 -0.164358973103396013E-03 -0.666666666666666970E-02 0.822694462574627937E-02 -0.400863023298765998E-09 -0.164360422527890990E-03 -0.404404639852252990E-09 0.822717693008181983E-02 -0.435040521994138000E-03 -0.666666666666666970E-02 0.921688317220892978E-02 -0.671546727788281000E-03 -0.435057865024523994E-03 -0.622877930192895020E-03 0.926586392288021934E-02 -0.666666666666666970E-02 -0.162062559830136006E-03 0.755721765142735018E-02 -0.413716779390587020E-11 -0.164605386233707005E-03 -0.610693931490910976E-09 0.822717713361387042E-02 -0.435057916250486973E-03 0.926586001868622000E-02 -0.622874471770934967E-03 -0.435040470763708000E-03 -0.666666666666666970E-02 -0.671542796319704962E-03 0.921687790126752074E-02 -0.164358941319385992E-03 -0.666666666666666970E-02 0.822694343451954926E-02 -0.400827305234235980E-09 -0.164360402068052007E-03 -0.404386932680776984E-09 0.822717693276291996E-02 -0.666666666666666970E-02 0.755853528834142963E-02 -0.410907237024926035E-11 -0.609517161481542976E-09 0.822725835180459011E-02 0.822699835769964000E-02 -0.229769146362661995E-09 -0.666666666666666970E-02 -0.552503881837150022E-11 0.815277410258053933E-02 -0.431800076739517009E-03 -0.666666666666666970E-02 0.911683250971320920E-02 -0.645107775821999975E-03 -0.438245882118603019E-03 -0.614761191522381957E-03 0.925173111512482028E-02 -0.666666666666666970E-02 -0.162066807230739002E-03 0.755721537399490963E-02 -0.430232877656178001E-11 -0.164626562646872987E-03 -0.634707547989976979E-09 0.822717705663869939E-02 -0.435011184600756027E-03 -0.666666666666666970E-02 0.921541718649479015E-02 -0.670832821913695961E-03 0.000000000000000000E+00 -0.435087195554581009E-03 -0.622361407627594967E-03 0.926525297165626920E-02 -0.666666666666666970E-02 -0.435024023596373988E-03 0.921463311000128051E-02 -0.670757573686644971E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435074360623453004E-03 0.000000000000000000E+00 -0.622355873651302019E-03 0.926393658659479917E-02 -0.666666666666666970E-02 -0.162274431703276004E-03 0.755738308037023996E-02 -0.318241339520051988E-10 -0.165730950021604001E-03 -0.457847648567586989E-08 0.822724281267087944E-02 0.822699408685815918E-02 -0.228425571690641000E-09 -0.666666666666666970E-02 -0.569261988097333975E-11 0.815841787771899064E-02 -0.666666666666666970E-02 -0.432079853778089007E-03 0.912452759982078018E-02 -0.647134253482418054E-03 -0.437974705900714988E-03 -0.615294574958598047E-03 0.925273449425248994E-02 -0.666666666666666970E-02 0.755849436233489019E-02 -0.410955655495479020E-11 -0.609618023537665964E-09 0.822725856237870035E-02 -0.162292777295443990E-03 -0.666666666666666970E-02 0.755742123691062993E-02 -0.382880604592767993E-10 -0.165829805992421012E-03 -0.549637808741221009E-08 0.822725133924320957E-02 -0.666666666666666970E-02 -0.162519834506453002E-03 0.758488071339646994E-02 -0.277296855958790984E-04 -0.531411092685664019E-03 -0.271305840096227018E-03 0.895289465200115042E-02 -0.666666666666666970E-02 -0.435144739153746991E-03 0.927049386352079077E-02 -0.627195195351904020E-03 0.000000000000000000E+00 -0.434953602544600011E-03 -0.673646030614466983E-03 0.921927161130219920E-02 -0.666666666666666970E-02 -0.162365832482225009E-03 0.770469472296904986E-02 -0.341374365570676015E-05 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.530093834704794035E-03 0.000000000000000000E+00 -0.670683663828369948E-04 0.878401698585971023E-02 -0.666666666666666970E-02 -0.406469353648676005E-03 0.868845378029185940E-02 -0.487445867647356014E-03 -0.459955485443886000E-03 -0.561455537392793009E-03 0.917876821487357934E-02 0.822724019224464927E-02 -0.586236000786575972E-09 -0.666666666666666970E-02 -0.399393159268467017E-11 0.756942275692288041E-02 -0.666666666666666970E-02 0.756938783566538965E-02 -0.398490203650706992E-11 -0.582276441424578042E-09 0.822717710481313952E-02 -0.666666666666666970E-02 -0.162064188321971013E-03 0.755638508819920975E-02 -0.426955668367828011E-11 -0.164621776697154991E-03 -0.630372643058242999E-09 0.822717559459556005E-02 -0.435097665976917993E-03 0.926770146147764921E-02 -0.624846565957563015E-03 -0.435000709641257001E-03 -0.666666666666666970E-02 -0.673347123713348948E-03 0.921745137370448032E-02 -0.666666666666666970E-02 -0.164221437092461006E-03 0.822590307325241077E-02 -0.301540502789892983E-09 -0.164276463598209013E-03 -0.317267646894687990E-09 0.822720896113244074E-02 -0.666666666666666970E-02 -0.435040484785290994E-03 0.921687621618137026E-02 -0.671540621958410973E-03 -0.435057902230129995E-03 -0.622872302682165040E-03 0.926585782249825934E-02 -0.666666666666666970E-02 -0.162127341684403998E-03 0.759121275229191969E-02 -0.381774204087762988E-11 0.000000000000000000E+00 -0.164583013632055004E-03 -0.545969305874372025E-09 0.822717722380156974E-02 -0.666666666666666970E-02 0.000000000000000000E+00 -0.166319316569640000E-03 0.755721776450155036E-02 -0.125637202975271002E-10 -0.666666666666666970E-02 0.000000000000000000E+00 -0.166854313971363013E-03 0.000000000000000000E+00 -0.695766372129872957E-09 0.759121342784058965E-02 -0.435007341079701976E-03 -0.666666666666666970E-02 0.921767423502365020E-02 -0.673397510122010013E-03 -0.435091037539719026E-03 -0.624857046911454035E-03 0.926772529096031930E-02 -0.666666666666666970E-02 -0.435042780427774989E-03 0.921682237061306926E-02 -0.671414330503463958E-03 -0.435055606761622013E-03 -0.622737221386204017E-03 0.926573013279845085E-02 -0.164512437178604990E-03 0.822720912015355993E-02 -0.478141451497207985E-09 -0.162035132712287004E-03 -0.666666666666666970E-02 -0.321987108752259996E-11 0.755727832217654039E-02 -0.162065357686432991E-03 -0.666666666666666970E-02 0.755727582940853970E-02 -0.423979675954428002E-11 -0.164618725863945991E-03 -0.625584793967118990E-09 0.822717479197860972E-02 -0.162035093479730998E-03 -0.666666666666666970E-02 0.755725620859361031E-02 -0.322006451958004010E-11 -0.164512451389381995E-03 -0.478178423500335003E-09 0.822720912019037943E-02 -0.666666666666666970E-02 -0.162063731980656992E-03 0.755721533834744982E-02 -0.418199541718481972E-11 -0.164611237647245009E-03 -0.617212851732219973E-09 0.822717573520458968E-02 -0.435072205240009014E-03 0.926779361315624960E-02 -0.624886867601484965E-03 -0.435026179501724010E-03 -0.666666666666666970E-02 -0.673540811814344947E-03 0.921830845677890955E-02 -0.435026047901024976E-03 -0.666666666666666970E-02 0.921631021320170989E-02 -0.671342130465355002E-03 -0.435072336809685012E-03 -0.622763518226994005E-03 0.926572086598713082E-02 -0.666666666666666970E-02 0.755741246850622026E-02 -0.410910644194978984E-11 -0.606515170668247041E-09 0.822717712895315936E-02 0.822711877443555936E-02 -0.222069534738639993E-09 -0.666666666666666970E-02 -0.774169806703398991E-11 0.819168186272310056E-02 -0.433633789741900023E-03 -0.666666666666666970E-02 0.917151071300197074E-02 -0.660635211420097053E-03 -0.436454594164531018E-03 -0.620192759941486029E-03 0.926043761605101039E-02 -0.666666666666666970E-02 -0.435400642028349984E-03 0.924569487161715914E-02 -0.602675681802749023E-03 -0.434697126892320004E-03 -0.641341627123005026E-03 0.919049697819046031E-02 -0.417766802147069983E-03 -0.666666666666666970E-02 0.884119022200588987E-02 -0.565196699593361013E-03 0.000000000000000000E+00 -0.450926780658111002E-03 -0.597865634330250052E-03 0.921780464690032976E-02 -0.666666666666666970E-02 -0.162927680290126010E-03 0.761083369281832008E-02 -0.416350633852703977E-04 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.530230645664085047E-03 0.000000000000000000E+00 -0.269827347276391011E-03 0.866009136742803935E-02 -0.666666666666666970E-02 -0.162144345304456006E-03 0.760092331739828028E-02 -0.320930758282627016E-05 -0.529813749405098054E-03 -0.838256824281321020E-04 0.879776792041750931E-02 0.822717682684740922E-02 -0.303187285772573992E-09 -0.666666666666666970E-02 -0.349301427021032004E-11 0.790044609360178972E-02 -0.666666666666666970E-02 -0.407938480218889010E-03 0.871084858846607989E-02 -0.512295469801531947E-03 -0.458837927071481999E-03 -0.588085060255285046E-03 0.920289209649226975E-02 -0.666666666666666970E-02 0.755933306797651038E-02 -0.410021674909237023E-11 -0.607789044576712979E-09 0.822725774217296021E-02 -0.162167689058960993E-03 -0.666666666666666970E-02 0.755666327531443957E-02 -0.112741283803980995E-10 -0.165135297900302988E-03 -0.164049635527082004E-08 0.822716577111987958E-02 -0.666666666666666970E-02 -0.415926985437479015E-03 0.874524524800943982E-02 -0.345826974275356008E-03 -0.452438516787343978E-03 -0.346461802416189973E-03 0.898436011623418053E-02 -0.666666666666666970E-02 -0.410332540159175974E-03 0.874022409518679030E-02 -0.526301953402972958E-03 0.000000000000000000E+00 -0.456978154604879979E-03 -0.594686220254121993E-03 0.921017399119665979E-02 -0.666666666666666970E-02 -0.349138358539420027E-03 0.808745335846144074E-02 -0.174389979817568010E-03 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.493489760022879976E-03 0.000000000000000000E+00 -0.295632770892417004E-03 0.859314451744194047E-02 -0.666666666666666970E-02 -0.162165456200227012E-03 0.755894485257972964E-02 -0.104756674728628994E-10 -0.165141511934384990E-03 -0.152686256191205990E-08 0.822721248562563977E-02 0.925936813792594023E-02 -0.620622042811125991E-03 -0.666666666666666970E-02 -0.661330294502400018E-03 0.917272029101801056E-02 -0.666666666666666970E-02 0.917272029101801056E-02 -0.661330294502400018E-03 -0.620622042811125991E-03 0.925936813792594023E-02 -0.666666666666666970E-02 -0.162059451012688989E-03 0.755773564896643039E-02 -0.398147961980706018E-11 -0.164599233536076003E-03 -0.588248966932235027E-09 0.822718777261857021E-02 -0.435010710626149014E-03 0.921784913653543939E-02 -0.671757497579554964E-03 -0.435087669354308976E-03 -0.666666666666666970E-02 -0.624205044887739973E-03 0.926731518984568077E-02 -0.666666666666666970E-02 -0.435087680804149997E-03 0.926689880496680933E-02 -0.623818561754816976E-03 -0.435010699170602000E-03 -0.671588999254513967E-03 0.921721221696823051E-02 -0.666666666666666970E-02 -0.162064554135907989E-03 0.755721778431040023E-02 -0.421379742453608961E-11 -0.164615465489623012E-03 -0.621841597575447999E-09 0.822717725634031921E-02 -0.666666666666666970E-02 -0.435032270858094002E-03 0.921659999696208994E-02 -0.671477624444696982E-03 0.000000000000000000E+00 -0.435066115102212007E-03 -0.622858575415140996E-03 0.926582763556333018E-02 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435031835174660986E-03 0.921598350730943976E-02 -0.671332923411385946E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435066550710658023E-03 0.000000000000000000E+00 -0.622784369281448010E-03 0.926515556236343074E-02 -0.435081584415368018E-03 -0.666666666666666970E-02 0.926715509609260003E-02 -0.624058431079940984E-03 -0.435016797724696992E-03 -0.671759302322477020E-03 0.921784300407063953E-02 -0.666666666666666970E-02 -0.162062416434052993E-03 0.755721809516292017E-02 -0.413174197731702039E-11 -0.164605215334601997E-03 -0.609922581957833024E-09 0.822717754034535051E-02 -0.435056252509799003E-03 0.926585458068619923E-02 -0.622864166386205044E-03 -0.435042134635998002E-03 -0.666666666666666970E-02 -0.671542142824929958E-03 0.921692265814959971E-02 -0.164341185001132012E-03 -0.666666666666666970E-02 0.822700548023182036E-02 -0.388254230807434987E-09 -0.164366486837535988E-03 -0.391288606812102024E-09 0.822719359654751001E-02 -0.435043745553257018E-03 -0.666666666666666970E-02 0.921697677898357035E-02 -0.671554360684184981E-03 -0.435054641693514987E-03 -0.622866717131421977E-03 0.926586042794482964E-02 -0.666666666666666970E-02 -0.162059863500320009E-03 0.755722860146506008E-02 -0.403707215866304025E-11 -0.164610171594745992E-03 -0.596715205847239044E-09 0.822719066601751078E-02 -0.435029181351461001E-03 0.921782667546000058E-02 -0.671761723962046014E-03 -0.435069204038055987E-03 -0.666666666666666970E-02 -0.623757152955008967E-03 0.926682695577680030E-02 -0.165176244427055993E-03 -0.666666666666666970E-02 0.822774267939753966E-02 -0.421637432761214996E-09 -0.164358900259774010E-03 -0.403642926186002997E-09 0.822717766237675979E-02 -0.666666666666666970E-02 0.895231589389712014E-02 -0.601006644879792970E-03 -0.606288347943506004E-03 0.923179444186011944E-02 0.921580902981316945E-02 -0.671118538607027963E-03 -0.666666666666666970E-02 -0.628092231229955013E-03 0.927168311539479026E-02 -0.174566249107139995E-03 -0.666666666666666970E-02 0.823503974641245050E-02 -0.510772714070998049E-06 -0.167453071908487002E-03 -0.287656386989065009E-06 0.822679160061134084E-02 -0.666666666666666970E-02 -0.162064983519178004E-03 0.755722213005034001E-02 -0.423003049664173980E-11 -0.164617409769547002E-03 -0.624194129888606984E-09 0.822717667703955040E-02 -0.435027350434486978E-03 -0.666666666666666970E-02 0.921651333906866981E-02 -0.671533317952461976E-03 0.000000000000000000E+00 -0.435071034569932023E-03 -0.622941399756069946E-03 0.926589080870137034E-02 -0.666666666666666970E-02 -0.435031629619492017E-03 0.921595126475713983E-02 -0.671462224228685047E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435066756229832998E-03 0.000000000000000000E+00 -0.622929617617789958E-03 0.926511475629528992E-02 -0.666666666666666970E-02 -0.162052976330494004E-03 0.755715655404278971E-02 -0.378464136913797002E-11 -0.164557846396870007E-03 -0.559379553656162034E-09 0.822711615671048967E-02 0.921569960267614070E-02 -0.671199548462878990E-03 -0.666666666666666970E-02 -0.624992541148623954E-03 0.926820621881743051E-02 -0.666666666666666970E-02 -0.170162317751058011E-03 0.823042266464544918E-02 -0.875297413337334996E-07 -0.166825903611445003E-03 -0.692727437231052063E-07 0.822728043872133011E-02 -0.666666666666666970E-02 0.900314235197411002E-02 -0.616359390013905962E-03 -0.610140243636164963E-03 0.923851413610003031E-02 -0.162042399128667987E-03 -0.666666666666666970E-02 0.755717695498461014E-02 -0.343592447834058983E-11 -0.164505035561990999E-03 -0.508562114575286970E-09 0.822712657561797990E-02 -0.666666666666666970E-02 -0.162064626628578005E-03 0.755722018474085013E-02 -0.421639430439463977E-11 -0.164615674182357004E-03 -0.622213619794602018E-09 0.822717716142255980E-02 -0.666666666666666970E-02 -0.435031360634292993E-03 0.921661780771376032E-02 -0.671522125948738010E-03 0.000000000000000000E+00 -0.435067025167480974E-03 -0.622906555198169992E-03 0.926587475408661987E-02 -0.666666666666666970E-02 -0.435031920197820993E-03 0.921597580576540010E-02 -0.671351984885895973E-03 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435066465702252001E-03 0.000000000000000000E+00 -0.622805921827540012E-03 0.926514231021871061E-02 -0.666666666666666970E-02 -0.162062001713370989E-03 0.755721764997798959E-02 -0.411598727890175012E-11 -0.164602606520337013E-03 -0.607613494988126020E-09 0.822717713025281072E-02 0.926588367987988062E-02 -0.622883813196616022E-03 -0.666666666666666970E-02 -0.671594845205971042E-03 0.921711546375830054E-02 -0.666666666666666970E-02 0.921711546375830054E-02 -0.671594845205971042E-03 -0.622883813196616022E-03 0.926588367987988062E-02 -0.666666666666666970E-02 -0.162080643229434011E-03 0.755719622314534028E-02 -0.488882760495459993E-11 -0.164695578667720994E-03 -0.719871721533777035E-09 0.822716411325388926E-02 -0.166686373905860005E-03 0.822673885725208956E-02 -0.487733280019655016E-07 -0.166616817170293989E-03 -0.666666666666666970E-02 -0.385414488502229971E-07 0.822045888465093924E-02 -0.666666666666666970E-02 -0.434813234051737991E-03 0.920937424335109035E-02 -0.669894558723525000E-03 -0.435284874465938004E-03 -0.622580915854730019E-03 0.926510963371094017E-02 -0.666666666666666970E-02 -0.435040408248740026E-03 0.921688082460559062E-02 -0.671547530054927020E-03 -0.435057978759970999E-03 -0.622879363153527985E-03 0.926586503625058920E-02 -0.666666666666666970E-02 -0.435048354812829995E-03 0.921714230585230984E-02 -0.671602199049514052E-03 0.000000000000000000E+00 -0.435050032579175011E-03 -0.622886590993160022E-03 0.926588809980974024E-02 -0.666666666666666970E-02 0.000000000000000000E+00 -0.162065782294302011E-03 0.755721459868404031E-02 -0.425813299026563987E-11 -0.666666666666666970E-02 0.000000000000000000E+00 -0.164621303266333996E-03 0.000000000000000000E+00 -0.628265660483508001E-09 0.822715314376032923E-02 -0.162542375670815992E-03 -0.666666666666666970E-02 0.755649757850690006E-02 -0.563432271384059970E-09 -0.167001893968643994E-03 -0.773903559881119996E-07 0.822676727708255076E-02 -0.666666666666666970E-02 -0.162062879664528998E-03 0.755721695458686990E-02 -0.414938028726372967E-11 -0.164606984614506006E-03 -0.612470146931394021E-09 0.822717691631253030E-02 -0.435061921280182988E-03 0.926591210512174059E-02 -0.622933977614011981E-03 -0.435036465303411009E-03 -0.666666666666666970E-02 -0.671580853268202043E-03 0.921680734331616937E-02 -0.435036519090423993E-03 -0.666666666666666970E-02 0.921831642815977917E-02 -0.673202190294888052E-03 -0.435061867500153025E-03 -0.624494985629193999E-03 0.926748120028762952E-02 -0.435006921432357974E-03 -0.666666666666666970E-02 0.921581685576767021E-02 -0.671357037929466998E-03 -0.435091457008391985E-03 -0.622887259624104036E-03 0.926580572699929952E-02 -0.666666666666666970E-02 -0.435000210414426987E-03 0.921805741531904997E-02 -0.673980656048901966E-03 -0.435098164960824000E-03 -0.625455737994957050E-03 0.926835240170618968E-02 -0.167002256657801997E-03 0.822676739801933045E-02 -0.775116142162273040E-07 -0.162540963549468999E-03 -0.666666666666666970E-02 -0.564105828717064959E-09 0.755607184337712002E-02 -0.162063151808471010E-03 -0.666666666666666970E-02 0.755610609873923027E-02 -0.425254382354864998E-11 -0.164619383929850006E-03 -0.628041057868921975E-09 0.822717599205483041E-02 -0.666666666666666970E-02 0.921687630329096064E-02 -0.671540851769114968E-03 -0.622872550846322991E-03 0.926585805654763991E-02 0.926585809314046070E-02 -0.622872566927465052E-03 -0.666666666666666970E-02 -0.671540928853904041E-03 0.921687664468394917E-02 -0.164358960620165987E-03 -0.666666666666666970E-02 0.822694395609338026E-02 -0.400847008848852017E-09 -0.164360413027222997E-03 -0.404398713049403977E-09 0.822717692924596067E-02 -0.666666666666666970E-02 -0.162064640983030012E-03 0.755718903530966032E-02 -0.421955570049609974E-11 -0.164616035944705001E-03 -0.622688578366344997E-09 0.822717711474709075E-02 -0.435031324368629995E-03 -0.666666666666666970E-02 0.921656922225446047E-02 -0.671471582389523958E-03 0.000000000000000000E+00 -0.435067061426646024E-03 -0.622858176290813008E-03 0.926582523607523932E-02 -0.666666666666666970E-02 -0.435031048387666001E-03 0.921594565739619967E-02 -0.671345384731177964E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435067337357646024E-03 0.000000000000000000E+00 -0.622804669702495955E-03 0.926513810679983028E-02 -0.666666666666666970E-02 -0.162062558200820996E-03 0.755721755342539988E-02 -0.413711391502012990E-11 -0.164605379088546999E-03 -0.610686125296504951E-09 0.822717713332750053E-02 0.926585808945160939E-02 -0.622872565296268030E-03 -0.666666666666666970E-02 -0.671540921029493000E-03 0.921687661003382959E-02 -0.666666666666666970E-02 -0.164358960452099987E-03 0.822694392878840081E-02 -0.400846613572565987E-09 -0.164360413029369988E-03 -0.404398732358932989E-09 0.822717692925123943E-02 -0.666666666666666970E-02 0.921687630355970053E-02 -0.671540851829773044E-03 -0.622872550859014987E-03 0.926585805657732971E-02 -0.162062558227438999E-03 -0.666666666666666970E-02 0.755721760853244034E-02 -0.413711033678103961E-11 -0.164605378673800013E-03 -0.610685584929304044E-09 0.822717713318102915E-02 -0.666666666666666970E-02 -0.162064570152263006E-03 0.755721841422958996E-02 -0.421433625240463005E-11 -0.164615390632404009E-03 -0.621914612647070953E-09 0.822717706877345960E-02 -0.666666666666666970E-02 -0.435032309673597981E-03 0.921659272513397979E-02 -0.671468254207514999E-03 0.000000000000000000E+00 -0.435066076293283009E-03 -0.622849275891169040E-03 0.926581888559491022E-02 -0.666666666666666970E-02 -0.435031684645659992E-03 0.921601778227170050E-02 -0.671377596560439956E-03 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435066701213360013E-03 0.000000000000000000E+00 -0.622828682917879006E-03 0.926519565944300058E-02 -0.666666666666666970E-02 -0.162168008367528997E-03 0.755720834454587983E-02 -0.111631510700418996E-10 -0.165130579339017988E-03 -0.162383743141729992E-08 0.822716472424545042E-02 0.822711943021554003E-02 -0.222017845615760010E-09 -0.666666666666666970E-02 -0.777761013879667033E-11 0.819196920656370918E-02 -0.666666666666666970E-02 0.917235355722251927E-02 -0.661205828628974026E-03 -0.620672923579346962E-03 0.926095692540444999E-02 -0.666666666666666970E-02 -0.162060886928861006E-03 0.755722230561571041E-02 -0.407434722062596981E-11 -0.164603197375618002E-03 -0.601754536458248027E-09 0.822718170485291916E-02 -0.435038460514972026E-03 0.921717884367924054E-02 -0.671598994291641997E-03 -0.435059926303323985E-03 -0.666666666666666970E-02 -0.623144536207649959E-03 0.926616805245615051E-02 -0.666666666666666970E-02 -0.435059886891442024E-03 0.926772984879662039E-02 -0.624637266544077001E-03 -0.435038499931220991E-03 -0.673028756927541039E-03 0.921880428832917015E-02 -0.666666666666666970E-02 -0.162064590679631006E-03 0.755721768483237979E-02 -0.421520958094493002E-11 -0.164615528433844000E-03 -0.622042930544206962E-09 0.822717716750567991E-02 -0.666666666666666970E-02 -0.435032046708114988E-03 0.921657978568275055E-02 -0.671461221312615953E-03 0.000000000000000000E+00 -0.435066339213917980E-03 -0.622843930781788978E-03 0.926581370321850027E-02 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435031628760092985E-03 0.921598912055172914E-02 -0.671353905916793046E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435066757089098022E-03 0.000000000000000000E+00 -0.622806875303639953E-03 0.926516708273138027E-02 -0.435042591383769009E-03 -0.666666666666666970E-02 0.921694193442373043E-02 -0.671550566383684018E-03 -0.435055795793300019E-03 -0.622869755363486023E-03 0.926586026640358919E-02 -0.666666666666666970E-02 -0.162061536519942994E-03 0.755722454624959024E-02 -0.409902564328030991E-11 -0.164610432002776987E-03 -0.605475143914787990E-09 0.822718468701886044E-02 -0.435048024953900003E-03 0.921879226944885034E-02 -0.673030814417662025E-03 -0.435050362434787020E-03 -0.666666666666666970E-02 -0.624403907899169952E-03 0.926747756344729022E-02 -0.164429321124159010E-03 -0.666666666666666970E-02 0.822722510694912958E-02 -0.405099938099922984E-09 -0.164359494664249005E-03 -0.403602349458583981E-09 0.822717730152964025E-02 -0.435057319269020976E-03 -0.666666666666666970E-02 0.926766299189552001E-02 -0.624575498879247960E-03 -0.435041067795715979E-03 -0.673029869981998019E-03 0.921880276298027955E-02 -0.666666666666666970E-02 -0.162062471480618010E-03 0.755721793381790036E-02 -0.413381786270217965E-11 -0.164605232829300992E-03 -0.610216091166618047E-09 0.822717734939633002E-02 -0.435056876315470997E-03 0.926585635253173064E-02 -0.622868044974361963E-03 -0.435041510784235001E-03 -0.666666666666666970E-02 -0.671542372337881029E-03 0.921690563981085974E-02 -0.164351356240546013E-03 -0.666666666666666970E-02 0.822698086373702987E-02 -0.395512544131405997E-09 -0.164365831071158996E-03 -0.398742006637490978E-09 0.822718605874551978E-02 -0.666666666666666970E-02 0.920452787846717975E-02 -0.668731214842509985E-03 -0.622283241890150981E-03 0.926452697312771072E-02 0.822724025612022077E-02 -0.586193181292780024E-09 -0.666666666666666970E-02 -0.399370802774403963E-11 0.756944692768340022E-02 -0.162114939474388000E-03 -0.666666666666666970E-02 0.756941304335114005E-02 -0.521957327686064991E-11 -0.164756390953382006E-03 -0.760317714252139982E-09 0.822719049518821043E-02 -0.666666666666666970E-02 -0.162348325334986989E-03 0.757141350179527019E-02 -0.154261696442177995E-04 -0.530720487223206049E-03 -0.221684447280562997E-03 0.891605622167588978E-02 -0.163035610806201990E-03 -0.666666666666666970E-02 0.791126088236420930E-02 -0.568886536946596036E-05 0.000000000000000000E+00 -0.530385316755896006E-03 -0.644747260841582991E-04 0.878526676544224942E-02 -0.666666666666666970E-02 -0.399601245888043025E-03 0.843932649296406934E-02 -0.399753779534292998E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.464883727737099017E-03 0.000000000000000000E+00 -0.476904229282087023E-03 0.874547695183781025E-02 -0.666666666666666970E-02 -0.162063070557396998E-03 0.755351614246298989E-02 -0.447630170821152977E-11 -0.164644944992450995E-03 -0.661962042461270997E-09 0.822717544552603076E-02 0.926551780075880044E-02 -0.622722840857442951E-03 -0.666666666666666970E-02 -0.670824267990035965E-03 0.921370888772457021E-02 -0.666666666666666970E-02 -0.434945178474266999E-03 0.921154593499040955E-02 -0.668579923648182010E-03 -0.435153154698610990E-03 -0.620572207908768009E-03 0.926326406979680050E-02 -0.666666666666666970E-02 0.755816621971812028E-02 -0.411326358485724017E-11 -0.610348502915667010E-09 0.822725905587247917E-02 -0.402874267847195980E-03 -0.666666666666666970E-02 0.865106149194994067E-02 -0.472609392802310021E-03 -0.462635021140100986E-03 -0.557606090580912007E-03 0.917391269482082002E-02 -0.666666666666666970E-02 -0.162519833129987003E-03 0.758488053948729989E-02 -0.277295971224945994E-04 -0.531411087991590981E-03 -0.271305613723866980E-03 0.895289446740019958E-02 -0.666666666666666970E-02 -0.435144823516233985E-03 0.927048838466438055E-02 -0.627189812839501980E-03 0.000000000000000000E+00 -0.434953518101383974E-03 -0.673638210139792994E-03 0.921926445312559045E-02 -0.666666666666666970E-02 -0.162365832936652005E-03 0.770469512965636042E-02 -0.341376585662229018E-05 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.530094362034539009E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.670698858845583960E-04 0.878401949557726033E-02 -0.666666666666666970E-02 -0.406469382005018019E-03 0.868845408484301973E-02 -0.487445981816212999E-03 -0.459955463954967011E-03 -0.561455560363951947E-03 0.917876824764385021E-02 0.822724019224600929E-02 -0.586235995361394981E-09 -0.666666666666666970E-02 -0.399393156536155037E-11 0.756942275977688043E-02 -0.666666666666666970E-02 0.756938783851883977E-02 -0.398490198193408992E-11 -0.582276435921294038E-09 0.822717710481313952E-02 -0.666666666666666970E-02 -0.162061804589790998E-03 0.755638842160883967E-02 -0.417671676259941005E-11 -0.164610667790356010E-03 -0.616892111647492956E-09 0.822717774708076922E-02 -0.435068239353668981E-03 0.926581925277365943E-02 -0.622854391016698997E-03 -0.435030146223642990E-03 -0.666666666666666970E-02 -0.671460680594175010E-03 0.921652801901830070E-02 -0.666666666666666970E-02 -0.164225650703184010E-03 0.822670097400921008E-02 -0.310328437504517979E-09 -0.164276003216507990E-03 -0.316769467002113981E-09 0.822720900471031989E-02 -0.666666666666666970E-02 -0.435040486303126981E-03 0.921687614266927975E-02 -0.671540506442383023E-03 -0.435057900712426009E-03 -0.622872183625812047E-03 0.926585769785132074E-02 -0.666666666666666970E-02 -0.162127332716032000E-03 0.759121301448910032E-02 -0.381743951397943036E-11 0.000000000000000000E+00 -0.164582975768345007E-03 -0.545926699323485040E-09 0.822717722734729075E-02 -0.666666666666666970E-02 -0.166319286578935988E-03 0.755721781846689991E-02 -0.125531050075483999E-10 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.166854284080451002E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.695180386014204010E-09 0.759121369168153960E-02 -0.435042566545895985E-03 -0.666666666666666970E-02 0.921694463459228966E-02 -0.671554769008383002E-03 -0.435055820629525977E-03 -0.622874016968664036E-03 0.926586380297488021E-02 -0.666666666666666970E-02 -0.435042888369998996E-03 0.921681962552989981E-02 -0.671408260658942963E-03 -0.435055498826277974E-03 -0.622730755410115045E-03 0.926572389917782943E-02 -0.164512123628282998E-03 0.822720916387005043E-02 -0.477817077089683953E-09 -0.162035062407616011E-03 -0.666666666666666970E-02 -0.321765105888902011E-11 0.755728112204505018E-02 -0.162062504982868002E-03 -0.666666666666666970E-02 0.755727880376912999E-02 -0.413009842789627016E-11 -0.164604675325971005E-03 -0.609640565546088004E-09 0.822717725918549990E-02 -0.162035022530451994E-03 -0.666666666666666970E-02 0.755725864235686981E-02 -0.321784768823900019E-11 -0.164512138073065988E-03 -0.477854634654902035E-09 0.822720916390744933E-02 -0.666666666666666970E-02 -0.162061944628342011E-03 0.755721772719453004E-02 -0.411382630968498961E-11 -0.164602365207998006E-03 -0.607300598025874993E-09 0.822717716454718015E-02 -0.435050067144334015E-03 0.926588448108721027E-02 -0.622883111379359043E-03 -0.435048320247372999E-03 -0.666666666666666970E-02 -0.671598373882453998E-03 0.921713778918750032E-02 -0.435048320032475997E-03 -0.666666666666666970E-02 0.921705200694796986E-02 -0.671504927255521023E-03 -0.435050067359229987E-03 -0.622795092030723960E-03 0.926579461935501947E-02 -0.666666666666666970E-02 0.755741246850972007E-02 -0.410910644191072979E-11 -0.606515170660810997E-09 0.822717712895315936E-02 0.822711877443554028E-02 -0.222069534616361010E-09 -0.666666666666666970E-02 -0.774169825927849969E-11 0.819168186428156052E-02 -0.433633789807636014E-03 -0.666666666666666970E-02 0.917151071499614987E-02 -0.660635211889124972E-03 -0.436454594099722020E-03 -0.620192760036731021E-03 0.926043761627064026E-02 -0.666666666666666970E-02 -0.435401639669350015E-03 0.924572083824642954E-02 -0.602697496588707024E-03 -0.434696125740512986E-03 -0.641341546899128006E-03 0.919049825465466028E-02 -0.417766807487059990E-03 -0.666666666666666970E-02 0.884119030057015955E-02 -0.565196723638367016E-03 0.000000000000000000E+00 -0.450926776155331018E-03 -0.597865637200630960E-03 0.921780465508625045E-02 -0.666666666666666970E-02 -0.162927680984810988E-03 0.761083385223151974E-02 -0.416350559257179991E-04 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.530230645905115008E-03 0.000000000000000000E+00 -0.269827250117159992E-03 0.866009133064084974E-02 -0.666666666666666970E-02 -0.162144345329803000E-03 0.760092333284733031E-02 -0.320930747823896000E-05 -0.529813749349416032E-03 -0.838256764278110937E-04 0.879776791477051949E-02 0.822717682684740922E-02 -0.303187284199640013E-09 -0.666666666666666970E-02 -0.349301427958089990E-11 0.790044609725218047E-02 -0.666666666666666970E-02 -0.407938480851945009E-03 0.871084859569853931E-02 -0.512295472619773030E-03 -0.458837926585046020E-03 -0.588085060897069957E-03 0.920289209739394085E-02 -0.666666666666666970E-02 0.755933306799180978E-02 -0.410021688445045033E-11 -0.607789044544431015E-09 0.822725774217296021E-02 -0.162167689058370998E-03 -0.666666666666666970E-02 0.755666327735462964E-02 -0.112741278002712006E-10 -0.165135297873814004E-03 -0.164049626541977997E-08 0.822716577112805013E-02 -0.666666666666666970E-02 -0.162508948582128004E-03 0.758386230747792964E-02 -0.268846850272863999E-04 -0.531370159696699959E-03 -0.268798452226511010E-03 0.895121067622118953E-02 -0.666666666666666970E-02 -0.435111783295980990E-03 0.926957338892927044E-02 -0.626355975132891958E-03 0.000000000000000000E+00 -0.434986584483092024E-03 -0.673610961683688029E-03 0.921919033051740028E-02 -0.666666666666666970E-02 -0.162383112105014989E-03 0.770842507081398020E-02 -0.352838184825081003E-05 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.529946733926804961E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.685200481248618984E-04 0.878522933538209046E-02 -0.666666666666666970E-02 -0.406001492103280994E-03 0.868371047881797038E-02 -0.486532578576920989E-03 -0.460309450350815017E-03 -0.562408172008129988E-03 0.917930422133162068E-02 0.822717710596379027E-02 -0.583431679178089002E-09 -0.666666666666666970E-02 -0.399067558641347968E-11 0.756879025163890995E-02 -0.666666666666666970E-02 0.756882506046350958E-02 -0.399965820178691986E-11 -0.587373439708311021E-09 0.822723979745249023E-02 -0.666666666666666970E-02 -0.162059699334328987E-03 0.755640315367181025E-02 -0.409724983315023040E-11 -0.164617694447567010E-03 -0.605894425669846960E-09 0.822719082319614027E-02 -0.435046673499388009E-03 0.921780363530167943E-02 -0.671765264155256970E-03 -0.435051713864318020E-03 -0.666666666666666970E-02 -0.623330396350863002E-03 0.926636396497984918E-02 -0.666666666666666970E-02 -0.164528465340170010E-03 0.822727665463759972E-02 -0.426431688727269020E-09 -0.164384495696545988E-03 -0.423252858884490006E-09 0.822718001752816008E-02 -0.666666666666666970E-02 -0.435040505956391999E-03 0.921687628114545993E-02 -0.671540025560727998E-03 -0.435057881060873976E-03 -0.622871599674077966E-03 0.926585723223368081E-02 -0.666666666666666970E-02 -0.162127506812547009E-03 0.759121355987753960E-02 -0.382292666777456993E-11 0.000000000000000000E+00 -0.164583701911017989E-03 -0.546697495368834967E-09 0.822717719106172071E-02 -0.666666666666666970E-02 0.000000000000000000E+00 -0.166319165150470994E-03 0.755721813537886013E-02 -0.125075381051564997E-10 -0.666666666666666970E-02 0.000000000000000000E+00 -0.166854154167308990E-03 0.000000000000000000E+00 -0.692662263886114992E-09 0.759121431463458986E-02 -0.435081853759711993E-03 -0.666666666666666970E-02 0.926716255638974050E-02 -0.624065278105386955E-03 -0.435016528292601001E-03 -0.671759479502790952E-03 0.921784374777362935E-02 -0.666666666666666970E-02 -0.435040907342144000E-03 0.921689077912838002E-02 -0.671540798041408006E-03 -0.435057479709268001E-03 -0.622869631331658029E-03 0.926586003647436943E-02 -0.164634692602426999E-03 0.822718023129201045E-02 -0.639569061694972014E-09 -0.162067618394035989E-03 -0.666666666666666970E-02 -0.433481102637178975E-11 0.755721907361002043E-02 -0.162058744590199013E-03 -0.666666666666666970E-02 0.755722007835428998E-02 -0.399695343689710995E-11 -0.164609059967339006E-03 -0.591014713275666970E-09 0.822719388300061083E-02 -0.162067628996523003E-03 -0.666666666666666970E-02 0.755722473679793037E-02 -0.433474416210013021E-11 -0.164634688823378012E-03 -0.639556359277955005E-09 0.822718023127935044E-02 -0.666666666666666970E-02 -0.162059007940064013E-03 0.755722761558702029E-02 -0.400519574062733029E-11 -0.164602739721675997E-03 -0.591972127525160032E-09 0.822718830313987938E-02 -0.435017256528589982E-03 0.921784067930350032E-02 -0.671758935795494969E-03 -0.435081125759231022E-03 -0.666666666666666970E-02 -0.624046237027197953E-03 0.926714180760297018E-02 -0.435081137126827990E-03 -0.666666666666666970E-02 0.926672070407219030E-02 -0.623654480728591011E-03 -0.435017245156463000E-03 -0.671584693353717023E-03 0.921719987714082929E-02 -0.666666666666666970E-02 0.755944258238581013E-02 -0.409900186657922026E-11 -0.607550069673094021E-09 0.822725761878752987E-02 0.822717683107230917E-02 -0.307429328465473987E-09 -0.666666666666666970E-02 -0.346590495801865012E-11 0.788944074290687074E-02 -0.405969478945487005E-03 -0.666666666666666970E-02 0.868864830866255926E-02 -0.503334330728072020E-03 -0.460337024732346983E-03 -0.585683665883423958E-03 0.919951404153332084E-02 -0.666666666666666970E-02 -0.434976282613521999E-03 0.922412114032335022E-02 -0.680891393674205956E-03 -0.435122078171959979E-03 -0.632218355554151046E-03 0.927546870226302056E-02 -0.163258162677796989E-03 -0.666666666666666970E-02 0.771427722386641007E-02 -0.477633227286587011E-04 0.000000000000000000E+00 -0.532374011844484007E-03 -0.264816258060028011E-03 0.893589130313274076E-02 -0.666666666666666970E-02 -0.163022377118752999E-03 0.756010678634908007E-02 -0.985505930025391935E-05 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.528465266297688968E-03 0.000000000000000000E+00 -0.112888567375700995E-03 0.818271007019684068E-02 0.000000000000000000E+00 -0.666666666666666970E-02 -0.162166578290559013E-03 0.755933922022227964E-02 -0.104753666208369006E-10 -0.165099058432877988E-03 -0.152245985024684009E-08 0.822717141733115068E-02 0.822711899107518921E-02 -0.221645138348997998E-09 -0.666666666666666970E-02 -0.803318982792332002E-11 0.819386807586616923E-02 -0.666666666666666970E-02 -0.433725919557807001E-03 0.917430862025363052E-02 -0.661288984210802052E-03 -0.436363721932067020E-03 -0.620322135837134050E-03 0.926073953967261956E-02 -0.666666666666666970E-02 0.755741928801698017E-02 -0.410903028639137974E-11 -0.606500685137881987E-09 0.822717712893872993E-02 -0.162225051674325002E-03 -0.666666666666666970E-02 0.762801683545900975E-02 -0.369132323199304980E-05 -0.529778141458443004E-03 -0.883337008316910978E-04 0.880271404367008979E-02 -0.666666666666666970E-02 -0.435040483790848998E-03 0.921687610102163955E-02 -0.671540507590834041E-03 -0.435057903224484984E-03 -0.622872195907222962E-03 0.926585773544527994E-02 -0.666666666666666970E-02 -0.435049596668294974E-03 0.926588177948937074E-02 -0.622878658890590954E-03 0.000000000000000000E+00 -0.435048790726422978E-03 -0.671587306492918033E-03 0.921715304894545062E-02 -0.666666666666666970E-02 -0.162062557266168006E-03 0.755721780085659000E-02 -0.413875860912190023E-11 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.164620130721840991E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.611410160549998955E-09 0.822718806718837972E-02 -0.666666666666666970E-02 -0.435040499350135008E-03 0.921687692625696060E-02 -0.671540975224692001E-03 -0.435057887666556015E-03 -0.622872560192916044E-03 0.926585810471101945E-02 0.822717712936562977E-02 -0.606929315017626020E-09 -0.666666666666666970E-02 -0.411128311855796991E-11 0.755721765969236993E-02 -0.666666666666666970E-02 0.755721765969236993E-02 -0.411128306434785967E-11 -0.606929314990520966E-09 0.822717712936562977E-02 -0.666666666666666970E-02 -0.162059344493046002E-03 0.755722651317146995E-02 -0.401759691005694024E-11 -0.164603667382743994E-03 -0.593754382165260968E-09 0.822718774646260045E-02 -0.435021349583778003E-03 0.921808799379093997E-02 -0.672054642719300036E-03 -0.435077033929117025E-03 -0.666666666666666970E-02 -0.624215698660223004E-03 0.926731300229175978E-02 -0.666666666666666970E-02 -0.435077064835482003E-03 0.926661685822192938E-02 -0.623558994505915955E-03 -0.435021318668023997E-03 -0.671590322333262958E-03 0.921719752068604972E-02 -0.666666666666666970E-02 -0.162064557104371002E-03 0.755721777860633021E-02 -0.421391246707965999E-11 -0.164615474626227001E-03 -0.621858123915516982E-09 0.822717725212621966E-02 -0.666666666666666970E-02 -0.435032241858439002E-03 0.921659906098254954E-02 -0.671477447178317022E-03 0.000000000000000000E+00 -0.435066144096944024E-03 -0.622858570800819051E-03 0.926582756926139055E-02 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435031827990191983E-03 0.921598377863167006E-02 -0.671334364038543986E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435066557893876995E-03 0.000000000000000000E+00 -0.622785914397349987E-03 0.926515598810159942E-02 -0.435075666026484009E-03 -0.666666666666666970E-02 0.926727820268466972E-02 -0.624183768120280973E-03 -0.435022717858528992E-03 -0.672055614043404951E-03 0.921808835078249955E-02 -0.666666666666666970E-02 -0.162062424733906989E-03 0.755721807683177975E-02 -0.413205652975882023E-11 -0.164605235421495002E-03 -0.609967620239067977E-09 0.822717752452148052E-02 -0.435056352038405984E-03 0.926585352741150087E-02 -0.622863300988667017E-03 -0.435042035100298007E-03 -0.666666666666666970E-02 -0.671540654928718013E-03 0.921691864794477005E-02 -0.164343513344571992E-03 -0.666666666666666970E-02 0.822700195448362002E-02 -0.389924360631551020E-09 -0.164367623390296991E-03 -0.392998923185469007E-09 0.822719276781523970E-02 -0.435043552264714024E-03 -0.666666666666666970E-02 0.921696961930900012E-02 -0.671552161840081006E-03 -0.435054834971315978E-03 -0.622865703361334027E-03 0.926585903533608951E-02 -0.666666666666666970E-02 -0.162060264459924991E-03 0.755722786475403977E-02 -0.405192380789587974E-11 -0.164611230243440009E-03 -0.598848077831665017E-09 0.822718997281340035E-02 -0.435034019503558974E-03 0.921807335886390064E-02 -0.672057800833721053E-03 -0.435064366738565982E-03 -0.666666666666666970E-02 -0.623908223929953956E-03 0.926697860106714955E-02 -0.164987547210470009E-03 -0.666666666666666970E-02 0.822761164777149022E-02 -0.417375828718230023E-09 -0.164359000422478990E-03 -0.403612204340233986E-09 0.822717763383406034E-02 -0.666666666666666970E-02 0.755721765245576018E-02 -0.411128322661895974E-11 -0.606929330387248952E-09 0.822717712936565058E-02 0.822717692676105082E-02 -0.401916206499038982E-09 -0.666666666666666970E-02 -0.398504312842813992E-09 0.822695178414330035E-02 -0.435040783009246019E-03 -0.666666666666666970E-02 0.921688646246055995E-02 -0.671543142540194956E-03 -0.435057604031761002E-03 -0.622873025744601029E-03 0.926585914161285988E-02 -0.666666666666666970E-02 -0.162064588603687992E-03 0.755722246650228965E-02 -0.421473369469323961E-11 -0.164615514901262993E-03 -0.621972777019227005E-09 0.822717720033716050E-02 -0.435031817237567986E-03 -0.666666666666666970E-02 0.921658561270818043E-02 -0.671475161541192035E-03 0.000000000000000000E+00 -0.435066568644765022E-03 -0.622858805909676998E-03 0.926582686247801046E-02 -0.666666666666666970E-02 -0.435031985152347994E-03 0.921599362553296962E-02 -0.671355526523753957E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435066400758952999E-03 0.000000000000000000E+00 -0.622807256895068981E-03 0.926516002397628075E-02 -0.666666666666666970E-02 -0.162062535395593997E-03 0.755721770745760007E-02 -0.413623360119686986E-11 -0.164605263979841011E-03 -0.610558026872576020E-09 0.822717713332134053E-02 0.822717692676101960E-02 -0.401916174204150005E-09 -0.666666666666666970E-02 -0.398504881043303978E-09 0.822695182391632029E-02 -0.666666666666666970E-02 -0.435040784478542004E-03 0.921688651148403967E-02 -0.671543153316058990E-03 -0.435057602562588996E-03 -0.622873027716196984E-03 0.926585914660035988E-02 -0.666666666666666970E-02 0.755721765260307984E-02 -0.411128317076067032E-11 -0.606929330073791030E-09 0.822717712936565058E-02 -0.162062535451855009E-03 -0.666666666666666970E-02 0.755721767183212961E-02 -0.413623865254959015E-11 -0.164605264611767001E-03 -0.610558775336228996E-09 0.822717713310498061E-02 -0.666666666666666970E-02 -0.435041508530557993E-03 0.921691025951686983E-02 -0.671547950168372028E-03 -0.435056878568975021E-03 -0.622873496063732976E-03 0.926586118314657974E-02 -0.666666666666666970E-02 -0.435049193102497007E-03 0.921716879431322932E-02 -0.671606868811568960E-03 0.000000000000000000E+00 -0.435049194293033988E-03 -0.622886307249136953E-03 0.926588937729599070E-02 -0.666666666666666970E-02 -0.162062558223755992E-03 0.755721793600594959E-02 -0.413708123041369019E-11 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.164605375314667000E-03 0.000000000000000000E+00 -0.610681172826885034E-09 0.822717711923300066E-02 -0.666666666666666970E-02 -0.435040497369072016E-03 0.921687685990914968E-02 -0.671540960664754978E-03 -0.435057889647446998E-03 -0.622872557534397998E-03 0.926585809770509930E-02 0.822717712936562977E-02 -0.606929315458585975E-09 -0.666666666666666970E-02 -0.411128312059151024E-11 0.755721765951058999E-02 -0.666666666666666970E-02 0.755721765951058999E-02 -0.411128314769655971E-11 -0.606929315377271019E-09 0.822717712936562977E-02 -0.666666666666666970E-02 -0.162062535431107012E-03 0.755721765815363031E-02 -0.413623888851339997E-11 -0.164605264643440992E-03 -0.610558828302725956E-09 0.822717713301761994E-02 -0.164360410638444992E-03 0.822717692977796913E-02 -0.404391259451850998E-09 -0.164359006270221988E-03 -0.666666666666666970E-02 -0.400958477955193996E-09 0.822695178639428967E-02 -0.666666666666666970E-02 -0.435040782988183007E-03 0.921688646176349081E-02 -0.671543142412070991E-03 -0.435057604052822009E-03 -0.622873025744216029E-03 0.926585914154554914E-02 -0.666666666666666970E-02 -0.435040480198357025E-03 0.921687629980787008E-02 -0.671540848012874966E-03 -0.435057906816663025E-03 -0.622872547218613964E-03 0.926585805277396063E-02 -0.666666666666666970E-02 -0.435049192310166017E-03 0.921716878050548964E-02 -0.671606876994400976E-03 0.000000000000000000E+00 -0.435049195085364978E-03 -0.622886319709903963E-03 0.926588938772177073E-02 -0.666666666666666970E-02 -0.162062559106551998E-03 0.755721765241205989E-02 -0.413713446085067003E-11 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.164605382365712999E-03 0.000000000000000000E+00 -0.610689035956021986E-09 0.822717709595601962E-02 -0.162062558807452007E-03 -0.666666666666666970E-02 0.755721765294025023E-02 -0.413712892374228960E-11 -0.164605381125477004E-03 -0.610688254193982979E-09 0.822717713365142024E-02 -0.666666666666666970E-02 -0.162062001820446010E-03 0.755721765238299981E-02 -0.411599096858730035E-11 -0.164602607028427003E-03 -0.607614049978665960E-09 0.822717713004911949E-02 -0.435050784134680991E-03 0.926588367152923927E-02 -0.622883807494963005E-03 -0.435047603248173024E-03 -0.666666666666666970E-02 -0.671594832025931951E-03 0.921711541901995036E-02 -0.435047603248167982E-03 -0.666666666666666970E-02 0.921711541808560922E-02 -0.671594830956834992E-03 -0.435050784134685979E-03 -0.622883806457666020E-03 0.926588367056311972E-02 -0.435040480225729010E-03 -0.666666666666666970E-02 0.921687630059584047E-02 -0.671540848078642995E-03 -0.435057906789294022E-03 -0.622872547125350026E-03 0.926585805273653050E-02 -0.666666666666666970E-02 -0.435040480222318978E-03 0.921687628821601067E-02 -0.671540835117597997E-03 -0.435057906792703025E-03 -0.622872534696777965E-03 0.926585803993784998E-02 -0.164605381122261992E-03 0.822717713365140983E-02 -0.610688243907033049E-09 -0.162062558816420988E-03 -0.666666666666666970E-02 -0.413712878804995986E-11 0.755721765777050969E-02 -0.162062559114418996E-03 -0.666666666666666970E-02 0.755721765773097014E-02 -0.413714007422677992E-11 -0.164605382603210006E-03 -0.610689880891928972E-09 0.822717713311491017E-02 -0.666666666666666970E-02 0.755721765237515972E-02 -0.411128320041550968E-11 -0.606929330558722976E-09 0.822717712936565058E-02 0.822717692676106990E-02 -0.401916224778808988E-09 -0.666666666666666970E-02 -0.398503993603989025E-09 0.822695176179714938E-02 -0.435040782183969974E-03 -0.666666666666666970E-02 0.921688643492011954E-02 -0.671543136482110042E-03 -0.435057604856967008E-03 -0.622873024631880002E-03 0.926585913880642077E-02 -0.666666666666666970E-02 -0.162064603069876005E-03 0.755721765255115002E-02 -0.421568943808864994E-11 -0.164615562666949012E-03 -0.622111784951490950E-09 0.822717714448729931E-02 -0.435031767249488988E-03 -0.666666666666666970E-02 0.921658397566323985E-02 -0.671474830577318974E-03 0.000000000000000000E+00 -0.435066618624126004E-03 -0.622858773502819960E-03 0.926582672350118042E-02 -0.666666666666666970E-02 -0.435031759838531019E-03 0.921598447237031992E-02 -0.671353608087835044E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435066626033650983E-03 0.000000000000000000E+00 -0.622806883370278998E-03 0.926515741219141944E-02 -0.666666666666666970E-02 -0.162062535484635997E-03 0.755721765240888969E-02 -0.413624141678407034E-11 -0.164605264966746989E-03 -0.610559194915996025E-09 0.822717713298093921E-02 0.822717692676106990E-02 -0.401916224778717016E-09 -0.666666666666666970E-02 -0.398503993605691001E-09 0.822695176179727081E-02 -0.666666666666666970E-02 -0.435040782183975015E-03 0.921688643492027046E-02 -0.671543136482143001E-03 -0.435057604856962997E-03 -0.622873024631885965E-03 0.926585913880642945E-02 -0.666666666666666970E-02 0.755721765237515972E-02 -0.411128320041550968E-11 -0.606929330558722046E-09 0.822717712936565058E-02 -0.162062535484635997E-03 -0.666666666666666970E-02 0.755721765240875958E-02 -0.413624138969446992E-11 -0.164605264966748995E-03 -0.610559194918309029E-09 0.822717713298093921E-02 -0.666666666666666970E-02 -0.162064573740720012E-03 0.755721745014789007E-02 -0.421455659253393022E-11 -0.164615418100806996E-03 -0.621947116183705050E-09 0.822717706997160014E-02 -0.666666666666666970E-02 -0.435032332380832999E-03 0.921659235812519011E-02 -0.671466933340958026E-03 0.000000000000000000E+00 -0.435066053589888019E-03 -0.622847839674434958E-03 0.926581781614685060E-02 -0.666666666666666970E-02 -0.435031592540386985E-03 0.921601550080636961E-02 -0.671377077483803031E-03 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435066793302425986E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.622828624049149020E-03 0.926519622533869026E-02 -0.666666666666666970E-02 -0.162168008363252009E-03 0.755720833616017017E-02 -0.111631527935513995E-10 -0.165130579415455000E-03 -0.162383770179478996E-08 0.822716472435720998E-02 0.822711943027826069E-02 -0.222017847415367999E-09 -0.666666666666666970E-02 -0.777760943851021986E-11 0.819196920090728073E-02 -0.666666666666666970E-02 0.917235354987984940E-02 -0.661205826876668044E-03 -0.620672923200048048E-03 0.926095692459277982E-02 -0.666666666666666970E-02 -0.162061842522603000E-03 0.755722298561666960E-02 -0.411041893471449994E-11 -0.164609505054689987E-03 -0.607053573850319957E-09 0.822718286444302019E-02 -0.164360468540171998E-03 0.822717730472606081E-02 -0.403903416083619976E-09 -0.164359675113830009E-03 -0.666666666666666970E-02 -0.403282765837450980E-09 0.822713708288526011E-02 -0.666666666666666970E-02 -0.435047234874819013E-03 0.921870444385484988E-02 -0.672996776069070043E-03 -0.435051152501487017E-03 -0.624363760097220040E-03 0.926743532429376987E-02 -0.666666666666666970E-02 -0.435040488105227008E-03 0.921687626274407014E-02 -0.671540573261780044E-03 -0.435057898910482974E-03 -0.622872236309804958E-03 0.926585776729671982E-02 -0.666666666666666970E-02 -0.435049476964798998E-03 0.926588407933098997E-02 -0.622880979701605024E-03 0.000000000000000000E+00 -0.435048910430331005E-03 -0.671593403370661043E-03 0.921715753812691989E-02 -0.666666666666666970E-02 0.000000000000000000E+00 -0.162062536856723012E-03 0.755721779084045971E-02 -0.413747795884770991E-11 -0.666666666666666970E-02 0.000000000000000000E+00 -0.164615649658616002E-03 0.000000000000000000E+00 -0.611079896253776960E-09 0.822718482294398055E-02 -0.162062474956520004E-03 -0.666666666666666970E-02 0.755722267180921042E-02 -0.413358698004224000E-11 -0.164605418170935013E-03 -0.610187187425962033E-09 0.822717750858028077E-02 -0.666666666666666970E-02 -0.162060451582619013E-03 0.755722364032399008E-02 -0.405820744876872962E-11 -0.164603101435012986E-03 -0.599472333441493042E-09 0.822718324339584070E-02 -0.435033610082050993E-03 0.921880734854766022E-02 -0.673021763224571968E-03 -0.435064776097052002E-03 -0.666666666666666970E-02 -0.624752427388953016E-03 0.926785482617738070E-02 -0.435064833463209995E-03 -0.666666666666666970E-02 0.926629223154662972E-02 -0.623259249505167968E-03 -0.435033552706628004E-03 -0.671592121883009046E-03 0.921718022533242996E-02 -0.435057725512486980E-03 -0.666666666666666970E-02 0.926766886039601979E-02 -0.624580707385012017E-03 -0.435040661518347027E-03 -0.673023701802679037E-03 0.921879973034194986E-02 -0.666666666666666970E-02 -0.435040745621057988E-03 0.921687745366213916E-02 -0.671533468502133989E-03 -0.435057641416789994E-03 -0.622863794394841984E-03 0.926585102903257005E-02 -0.164605418982737001E-03 0.822717750858287938E-02 -0.610189797184088988E-09 -0.162062472691372001E-03 -0.666666666666666970E-02 -0.413360072924052010E-11 0.755722145179233036E-02 -0.162060895421523010E-03 -0.666666666666666970E-02 0.755722209603739998E-02 -0.407539453490869988E-11 -0.164609527091146007E-03 -0.602110183541000016E-09 0.822718635496286037E-02 -0.666666666666666970E-02 0.920452788146493978E-02 -0.668731215529775008E-03 -0.622283242035163997E-03 0.926452697345470956E-02 0.822724025612128936E-02 -0.586193175774719989E-09 -0.666666666666666970E-02 -0.399370800068332996E-11 0.756944693050668957E-02 -0.162114939472544992E-03 -0.666666666666666970E-02 0.756941304617413969E-02 -0.521957294732459965E-11 -0.164756390920850005E-03 -0.760317657384023048E-09 0.822719049519187069E-02 -0.666666666666666970E-02 -0.162348328933429009E-03 0.757141579588467995E-02 -0.154258694392289985E-04 -0.530720471615547980E-03 -0.221681707782894001E-03 0.891605396648051038E-02 -0.163035609389389007E-03 -0.666666666666666970E-02 0.791126060525454075E-02 -0.568885700258806994E-05 0.000000000000000000E+00 -0.530385314277834956E-03 -0.644746756760560056E-04 0.878526670786249943E-02 -0.666666666666666970E-02 -0.399601863075062019E-03 0.843933027528507955E-02 -0.399755487421508981E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.464883291727197006E-03 0.000000000000000000E+00 -0.476904595508027981E-03 0.874547668628808915E-02 -0.666666666666666970E-02 -0.162063070476976004E-03 0.755351620828405030E-02 -0.447629245041502000E-11 -0.164644943926219012E-03 -0.661960659410746001E-09 0.822717544592907989E-02 0.926551780966223919E-02 -0.622722844759921010E-03 -0.666666666666666970E-02 -0.670824286631610035E-03 0.921370896996778939E-02 -0.666666666666666970E-02 -0.434945180954293003E-03 0.921154601677883975E-02 -0.668579941724334022E-03 -0.435153152221171024E-03 -0.620572211261187045E-03 0.926326407816084954E-02 -0.666666666666666970E-02 0.755816621995536019E-02 -0.411326355512401980E-11 -0.610348502411631949E-09 0.822725905587248958E-02 -0.402874286571233979E-03 -0.666666666666666970E-02 0.865106167785427947E-02 -0.472609462967472005E-03 -0.462635007412455013E-03 -0.557606102878732972E-03 0.917391271174717964E-02 -0.666666666666666970E-02 -0.415928551473747994E-03 0.874525952715160017E-02 -0.345831822895715981E-03 -0.452437222882241992E-03 -0.346464452179155984E-03 0.898436212884005936E-02 -0.666666666666666970E-02 -0.410333854430279001E-03 0.874024567848459034E-02 -0.526325481552128048E-03 0.000000000000000000E+00 -0.456977123849370008E-03 -0.594710599463255975E-03 0.921019356669105019E-02 -0.666666666666666970E-02 -0.349138128782108001E-03 0.808745191949900022E-02 -0.174389554292269988E-03 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.493489859954412968E-03 0.000000000000000000E+00 -0.295632059850581025E-03 0.859314138531184005E-02 -0.666666666666666970E-02 -0.162165456183634003E-03 0.755894482058337962E-02 -0.104756736090630003E-10 -0.165141512110283999E-03 -0.152686347573174001E-08 0.822721248554279978E-02 0.925936813557192914E-02 -0.620622041734691043E-03 -0.666666666666666970E-02 -0.661330289554338960E-03 0.917272027027182975E-02 -0.666666666666666970E-02 0.917272027027182975E-02 -0.661330289554338960E-03 -0.620622041734691043E-03 0.925936813557192914E-02 -0.666666666666666970E-02 -0.162062426567090008E-03 0.755772612433826957E-02 -0.409070240552687966E-11 -0.164599684884186013E-03 -0.603681595043669966E-09 0.822717664154948985E-02 -0.435043942803598025E-03 0.921717692320900978E-02 -0.671606591937570947E-03 -0.435054444453763980E-03 -0.666666666666666970E-02 -0.623016030658318006E-03 0.926602914632313965E-02 -0.666666666666666970E-02 -0.435054444436950010E-03 0.926603032447032016E-02 -0.623017161092738014E-03 -0.435043942820411995E-03 -0.671607789864231051E-03 0.921717803397737075E-02 -0.666666666666666970E-02 -0.162064603090850004E-03 0.755721765255309031E-02 -0.421569022865129025E-11 -0.164615562771554003E-03 -0.622111903849669951E-09 0.822717714451166003E-02 -0.666666666666666970E-02 -0.435031767124851004E-03 0.921658398188585082E-02 -0.671474840199063022E-03 0.000000000000000000E+00 -0.435066618748741979E-03 -0.622858783402943975E-03 0.926582673394005067E-02 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435031759684571979E-03 0.921598447096053965E-02 -0.671353614127903012E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435066626187581996E-03 0.000000000000000000E+00 -0.622806890458470035E-03 0.926515741526657936E-02 -0.435040444163285018E-03 -0.666666666666666970E-02 0.921687548937809992E-02 -0.671541060679635036E-03 -0.435057942848580980E-03 -0.622872964977058013E-03 0.926585833344844949E-02 -0.666666666666666970E-02 -0.162062561393839003E-03 0.755721764798701970E-02 -0.413722734407884995E-11 -0.164605394042161002E-03 -0.610702580324128951E-09 0.822717713158752952E-02 -0.435057935435147018E-03 0.926585952661160978E-02 -0.622874138897695035E-03 -0.435040451577369989E-03 -0.666666666666666970E-02 -0.671542323723484992E-03 0.921687685921447966E-02 -0.164358967946371006E-03 -0.666666666666666970E-02 0.822694291289794051E-02 -0.400842285147718976E-09 -0.164360426857548987E-03 -0.404409911340958016E-09 0.822717692852258965E-02 -0.435040431638602989E-03 -0.666666666666666970E-02 0.921687619006436934E-02 -0.671542172631628951E-03 -0.435057955372164984E-03 -0.622874107379275976E-03 0.926585945492748933E-02 -0.666666666666666970E-02 -0.162062560447055011E-03 0.755721764904289991E-02 -0.413719132263620016E-11 -0.164605389320235000E-03 -0.610697341609479036E-09 0.822717713218053953E-02 -0.435057923606845009E-03 0.926585840260774063E-02 -0.622872995394380970E-03 -0.435040463406708008E-03 -0.666666666666666970E-02 -0.671541206497535000E-03 0.921687613516897035E-02 -0.164358974674477006E-03 -0.666666666666666970E-02 0.822694323093815967E-02 -0.400850494105121994E-09 -0.164360431601409999E-03 -0.404413311485434000E-09 0.822717692781809069E-02 -0.666666666666666970E-02 0.895231589357087069E-02 -0.601006644778351003E-03 -0.606288347917667949E-03 0.923179444181664033E-02 0.921580902911038960E-02 -0.671118539139058980E-03 -0.666666666666666970E-02 -0.628092211209406968E-03 0.927168309255030007E-02 -0.174566226528932001E-03 -0.666666666666666970E-02 0.823503972118425076E-02 -0.510771931197153037E-06 -0.167453071908171011E-03 -0.287656482013064974E-06 0.822679160086088948E-02 -0.666666666666666970E-02 -0.162064998742904011E-03 0.755721692802432000E-02 -0.423105196131728011E-11 -0.164617536966823991E-03 -0.624345265608089027E-09 0.822717661914844009E-02 -0.435027295434020991E-03 -0.666666666666666970E-02 0.921651153587084958E-02 -0.671532950972917025E-03 0.000000000000000000E+00 -0.435071089558342007E-03 -0.622941361344091053E-03 0.926589065346502053E-02 -0.666666666666666970E-02 -0.435031391217050999E-03 0.921594150620621937E-02 -0.671460176695444977E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435066994590015989E-03 0.000000000000000000E+00 -0.622929213193418037E-03 0.926511191021490987E-02 -0.666666666666666970E-02 -0.162052976422443997E-03 0.755715649690166975E-02 -0.378464866461816970E-11 -0.164557847410658995E-03 -0.559380649971562964E-09 0.822711615636883935E-02 0.921569960071171035E-02 -0.671199549931674020E-03 -0.666666666666666970E-02 -0.624992484434852037E-03 0.926820615627817028E-02 -0.666666666666666970E-02 -0.170162251258758009E-03 0.823042260002626071E-02 -0.875293478015414056E-07 -0.166825903638244990E-03 -0.692727617230331017E-07 0.822728043872173083E-02 -0.666666666666666970E-02 0.900314235025987016E-02 -0.616359389511348982E-03 -0.610140243511595988E-03 0.923851413586814982E-02 -0.162042399164606009E-03 -0.666666666666666970E-02 0.755717693475327010E-02 -0.343592691293592996E-11 -0.164505035934295996E-03 -0.508562477958249036E-09 0.822712657547360927E-02 -0.666666666666666970E-02 -0.435040441298520982E-03 0.921688002980029959E-02 -0.671545791530904035E-03 -0.435057945713094023E-03 -0.622877507553345035E-03 0.926586316856804065E-02 -0.666666666666666970E-02 -0.162131095725141989E-03 0.759120433096500003E-02 -0.393925786733449011E-11 0.000000000000000000E+00 -0.164599730797555013E-03 -0.563072851826958952E-09 0.822717559878123963E-02 -0.666666666666666970E-02 0.000000000000000000E+00 -0.166319227572208993E-03 0.755721552651014965E-02 -0.125030561412552005E-10 -0.666666666666666970E-02 0.000000000000000000E+00 -0.166854118797604998E-03 0.000000000000000000E+00 -0.692387796644269027E-09 0.759120514631322994E-02 -0.666666666666666970E-02 -0.162099247780077998E-03 0.755721979757678023E-02 -0.581369082613002011E-11 -0.164789517015928995E-03 -0.853933169653901039E-09 0.822717846337374034E-02 0.926237298745190979E-02 -0.621979742880892045E-03 -0.666666666666666970E-02 -0.667643955617555022E-03 0.919963617413947950E-02 -0.666666666666666970E-02 0.920119114136109043E-02 -0.667964673290031966E-03 -0.622121226735743030E-03 0.926416149733888007E-02 -0.666666666666666970E-02 -0.435000220488568974E-03 0.921804320947046917E-02 -0.673963456108928050E-03 -0.435098154891607001E-03 -0.625438906427754982E-03 0.926833740265235045E-02 -0.167002100358977013E-03 0.822676745335870936E-02 -0.774836785822800982E-07 -0.162540956180643993E-03 -0.666666666666666970E-02 -0.563903073588569969E-09 0.755607876483108988E-02 -0.666666666666666970E-02 -0.162059390613440994E-03 0.755611424124089017E-02 -0.410936949933791987E-11 -0.164621823131188991E-03 -0.607895080715946992E-09 0.822719290698344964E-02 -0.666666666666666970E-02 -0.162064679580902008E-03 0.755721760719274029E-02 -0.421866212845835019E-11 0.000000000000000000E+00 -0.164615944113866006E-03 -0.622543990942232019E-09 0.822717713446135049E-02 -0.666666666666666970E-02 -0.435031107052827001E-03 0.921656310812869056E-02 -0.671471001355917964E-03 0.000000000000000000E+00 -0.435067278703290996E-03 -0.622858834069494032E-03 0.926582569486444040E-02 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435031446660309002E-03 0.921599853918038045E-02 -0.671396777488897042E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435066939156653987E-03 0.000000000000000000E+00 -0.622852608399671048E-03 0.926518020755455994E-02 -0.162542334394280010E-03 -0.666666666666666970E-02 0.755649428756813030E-02 -0.563245912569737047E-09 -0.167001746375183998E-03 -0.773653708482778048E-07 0.822676733536495917E-02 -0.666666666666666970E-02 -0.162063551455513005E-03 0.755721718231079035E-02 -0.417514955388785973E-11 -0.164610567026664999E-03 -0.616225367346680050E-09 0.822717731694085942E-02 -0.164377985077527987E-03 0.822719270254208997E-02 -0.401055733513956985E-09 -0.164351625080718007E-03 -0.666666666666666970E-02 -0.392176583167848991E-09 0.822661805239805963E-02 -0.435027796634762003E-03 -0.666666666666666970E-02 0.921803071428142946E-02 -0.673143999294995021E-03 -0.435070588466728980E-03 -0.624488956449440998E-03 0.926745798940319977E-02 -0.162061399017261011E-03 -0.666666666666666970E-02 0.755719884111734960E-02 -0.409719349842826014E-11 -0.164621105345924009E-03 -0.605582245755702968E-09 0.822719290467982013E-02 -0.666666666666666970E-02 -0.162080612318650990E-03 0.755719632784433975E-02 -0.488742792731292979E-11 -0.164695424088829993E-03 -0.719668653572314038E-09 0.822716417739921026E-02 -0.166686224033829990E-03 0.822673892793746031E-02 -0.487607625654436989E-07 -0.166616778531177993E-03 -0.666666666666666970E-02 -0.385465175249295009E-07 0.822046889213087079E-02 -0.434813594858741027E-03 -0.666666666666666970E-02 0.920936888606463042E-02 -0.669875781881485994E-03 -0.435284514510405013E-03 -0.622560397625597029E-03 0.926509289165874939E-02 -0.666666666666666970E-02 0.927696108602655070E-02 -0.632627915671010992E-03 -0.671545420293352051E-03 0.921776198077606072E-02 0.922026582434420060E-02 -0.602408833914311001E-03 -0.666666666666666970E-02 -0.556545703137562042E-03 0.881922173549517033E-02 -0.163017068024577997E-03 -0.666666666666666970E-02 0.795594792411035041E-02 -0.273445418458471009E-11 0.000000000000000000E+00 -0.164269850502767994E-03 -0.212776837501969996E-09 0.822716867001030069E-02 -0.666666666666666970E-02 -0.435047888244456974E-03 0.922079054223712974E-02 -0.674652724066845955E-03 -0.435050499142537988E-03 -0.626174907785970051E-03 0.926933372330603945E-02 -0.317388785409780010E-03 -0.666666666666666970E-02 0.807555137448344972E-02 -0.161231757480141994E-03 0.000000000000000000E+00 -0.506658477841221994E-03 -0.311775274478785975E-03 0.895372834363604084E-02 -0.666666666666666970E-02 -0.163033771455105996E-03 0.757711880141135989E-02 -0.230783976059587012E-04 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.528851957736768035E-03 0.000000000000000000E+00 -0.189450184417205001E-03 0.832713861616275926E-02 -0.666666666666666970E-02 -0.161988418302652995E-03 0.755983344534295974E-02 -0.201738904538359992E-11 -0.164212922771786994E-03 -0.300297007087386020E-09 0.822710829050683073E-02 0.921859794089704976E-02 -0.671461758609938038E-03 -0.666666666666666970E-02 -0.645287728552976008E-03 0.929336166647132082E-02 -0.666666666666666970E-02 -0.192231196795263987E-03 0.825469366452986041E-02 -0.211365722510570006E-06 0.000000000000000000E+00 -0.166616012173876997E-03 -0.134858265397606994E-07 0.822777128965413058E-02 -0.666666666666666970E-02 0.902089049924758961E-02 -0.621512175798593952E-03 -0.611398199502805962E-03 0.924071304560030934E-02 -0.162013975003999001E-03 -0.666666666666666970E-02 0.758489414663056968E-02 -0.176583073381748006E-05 -0.529428370961672052E-03 -0.381594181184865010E-04 0.875176256452839946E-02 -0.666666666666666970E-02 -0.435042623232502024E-03 0.921694787103495040E-02 -0.671556689509785008E-03 -0.435055763946669001E-03 -0.622875549110628985E-03 0.926586538965971929E-02 -0.666666666666666970E-02 -0.435049192429825995E-03 0.921716878108901072E-02 -0.671606874278178004E-03 0.000000000000000000E+00 -0.435049194965705000E-03 -0.622886316419352971E-03 0.926588938457374028E-02 -0.666666666666666970E-02 0.000000000000000000E+00 -0.162062558982806990E-03 0.755721781610755976E-02 -0.413711682785996024E-11 -0.666666666666666970E-02 0.000000000000000000E+00 -0.164605380148779006E-03 0.000000000000000000E+00 -0.610686395487684964E-09 0.822717709924931928E-02 -0.666666666666666970E-02 -0.435040485643667027E-03 0.921687647065858040E-02 -0.671540876851966020E-03 -0.435057901371828989E-03 -0.622872543909110035E-03 0.926585805994004953E-02 0.822717712936564018E-02 -0.606929319904127040E-09 -0.666666666666666970E-02 -0.411128314368110998E-11 0.755721765744662034E-02 -0.666666666666666970E-02 0.755721765744662034E-02 -0.411128314368110998E-11 -0.606929319768601975E-09 0.822717712936562977E-02 -0.666666666666666970E-02 -0.162062535498602012E-03 0.755721765388590004E-02 -0.413624192489096004E-11 -0.164605265022377999E-03 -0.610559256318664035E-09 0.822717713308667060E-02 -0.164360427767194014E-03 0.822717693070432014E-02 -0.404403342469463976E-09 -0.164359021906931011E-03 -0.666666666666666970E-02 -0.400970042731464983E-09 0.822695176085696048E-02 -0.666666666666666970E-02 -0.435040781993407998E-03 0.921688642847521070E-02 -0.671543134942478989E-03 -0.435057605047514022E-03 -0.622873024233365990E-03 0.926585913807351051E-02 -0.666666666666666970E-02 -0.435040480200654992E-03 0.921687629983308949E-02 -0.671540847971295054E-03 0.000000000000000000E+00 -0.435057906814365980E-03 -0.622872547165051992E-03 0.926585805272850914E-02 -0.666666666666666970E-02 -0.435049192316071991E-03 0.921716878067056940E-02 -0.671606877003452980E-03 0.000000000000000000E+00 -0.435049195079459979E-03 -0.622886319684859977E-03 0.926588938770844979E-02 -0.666666666666666970E-02 0.000000000000000000E+00 -0.162062559106629004E-03 0.755721765243790033E-02 -0.413713440957626983E-11 -0.666666666666666970E-02 0.000000000000000000E+00 -0.164605382367111999E-03 0.000000000000000000E+00 -0.610689040449987030E-09 0.822717709614393007E-02 -0.162062562108060003E-03 -0.666666666666666970E-02 0.755721765207954983E-02 -0.413725467961124014E-11 -0.164605397579012993E-03 -0.610706551944964963E-09 0.822717713458392085E-02 -0.666666666666666970E-02 -0.162062001816588988E-03 0.755721765239816996E-02 -0.411599093120005983E-11 -0.164602607009130996E-03 -0.607614028771139961E-09 0.822717713005806026E-02 -0.435050784089093976E-03 0.926588367178806001E-02 -0.622883807627942028E-03 -0.435047603293761014E-03 -0.666666666666666970E-02 -0.671594832438412981E-03 0.921711542063954024E-02 -0.435047603293765026E-03 -0.666666666666666970E-02 0.921711542225988993E-02 -0.671594833533352984E-03 -0.435050784089088989E-03 -0.622883808601936025E-03 0.926588367352939972E-02 -0.435040480241351984E-03 -0.666666666666666970E-02 0.921687630121104974E-02 -0.671540848264394041E-03 -0.435057906773672024E-03 -0.622872547211143052E-03 0.926585805288993036E-02 -0.666666666666666970E-02 -0.435040480209677994E-03 0.921687629817317076E-02 -0.671540845217228006E-03 -0.435057906805342979E-03 -0.622872544373526951E-03 0.926585805077998967E-02 -0.164605397577960991E-03 0.822717713458392085E-02 -0.610706548612634985E-09 -0.162062562110996998E-03 -0.666666666666666970E-02 -0.413725463469866987E-11 0.755721765366132014E-02 -0.162062559104681993E-03 -0.666666666666666970E-02 0.755721765355718989E-02 -0.413713996767741984E-11 -0.164605382596239995E-03 -0.610689879340446991E-09 0.822717713313662023E-02 -0.666666666666666970E-02 0.755721765237517013E-02 -0.411128320041531016E-11 -0.606929330558691957E-09 0.822717712936565058E-02 0.822717692676106990E-02 -0.401916224886744012E-09 -0.666666666666666970E-02 -0.398503993612473003E-09 0.822695176179774959E-02 -0.435040782183991983E-03 -0.666666666666666970E-02 0.921688643492086027E-02 -0.671543136482271046E-03 -0.435057604856944999E-03 -0.622873024631908950E-03 0.926585913880649016E-02 -0.666666666666666970E-02 -0.162064603069654990E-03 0.755721765252322011E-02 -0.421568940485412024E-11 -0.164615562666129003E-03 -0.622111784075274967E-09 0.822717714448810943E-02 -0.435031767248445010E-03 -0.666666666666666970E-02 0.921658397562742995E-02 -0.671474830568459958E-03 0.000000000000000000E+00 -0.435066618625169982E-03 -0.622858773500244004E-03 0.926582672349660075E-02 -0.666666666666666970E-02 -0.435031759841376020E-03 0.921598447242718069E-02 -0.671353608099204964E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435066626030805982E-03 0.000000000000000000E+00 -0.622806883368841996E-03 0.926515741215891940E-02 -0.666666666666666970E-02 -0.162062535484632989E-03 0.755721765241007971E-02 -0.413624141658310988E-11 -0.164605264966721999E-03 -0.610559194886172028E-09 0.822717713298093921E-02 0.822717692676106990E-02 -0.401916224750255024E-09 -0.666666666666666970E-02 -0.398503993629537016E-09 0.822695176179893961E-02 -0.666666666666666970E-02 -0.435040782184036978E-03 0.921688643492233999E-02 -0.671543136482596957E-03 -0.435057604856900980E-03 -0.622873024631969015E-03 0.926585913880663935E-02 -0.666666666666666970E-02 0.755721765237517013E-02 -0.411128320041521968E-11 -0.606929330558677998E-09 0.822717712936565058E-02 -0.162062535484634994E-03 -0.666666666666666970E-02 0.755721765240914990E-02 -0.413624147093784990E-11 -0.164605264966739996E-03 -0.610559194907676036E-09 0.822717713298093921E-02 -0.666666666666666970E-02 -0.435040482273178026E-03 0.921687613607913986E-02 -0.671540613305316998E-03 -0.435057904742022978E-03 -0.622872309434753979E-03 0.926585781733488961E-02 -0.666666666666666970E-02 -0.162127387857516993E-03 0.759121301019225012E-02 -0.381918248612579987E-11 0.000000000000000000E+00 -0.164583146079131992E-03 -0.546169842951880971E-09 0.822717717071064919E-02 -0.666666666666666970E-02 0.000000000000000000E+00 -0.166319269560149001E-03 0.755721776272199035E-02 -0.125468739914664006E-10 -0.666666666666666970E-02 0.000000000000000000E+00 -0.166854266156419006E-03 0.000000000000000000E+00 -0.694836199054006039E-09 0.759121369204142967E-02 -0.666666666666666970E-02 -0.162091523337547997E-03 0.755721898424006021E-02 -0.540934677579967032E-11 0.000000000000000000E+00 -0.164750533637444990E-03 -0.795365337200214006E-09 0.822717800588011056E-02 0.926452030343002941E-02 -0.622281047419194033E-03 -0.666666666666666970E-02 -0.668721216430620004E-03 0.920448407564962955E-02 -0.666666666666666970E-02 0.920291934243665927E-02 -0.668399806909403954E-03 -0.622140086487220031E-03 0.926273191293569033E-02 -0.666666666666666970E-02 -0.435042276336174984E-03 0.921682348368780928E-02 -0.671431373639223947E-03 -0.435056110819545018E-03 -0.622756694074473051E-03 0.926574695221287978E-02 -0.164604656270789013E-03 0.822717725536971020E-02 -0.609640574535708954E-09 -0.162062479874466994E-03 -0.666666666666666970E-02 -0.413006085521291964E-11 0.755726759055258979E-02 -0.666666666666666970E-02 -0.162006162226876999E-03 0.755727139187646006E-02 -0.248123275689687997E-11 -0.164398193105456003E-03 -0.370429927246309000E-09 0.822722962912524058E-02 -0.666666666666666970E-02 -0.162064599546255997E-03 0.755721765602699984E-02 -0.421555276492109033E-11 0.000000000000000000E+00 -0.164615545095072007E-03 -0.622091901725074988E-09 0.822717714627387020E-02 -0.666666666666666970E-02 -0.435031818763029980E-03 0.921658104872762930E-02 -0.671470214801774966E-03 0.000000000000000000E+00 -0.435066567119568007E-03 -0.622854034507566018E-03 0.926582206155607963E-02 -0.666666666666666970E-02 -0.435031752461292008E-03 0.921598570305219057E-02 -0.671353593598220024E-03 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435066633409600010E-03 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.622806719992244047E-03 0.926515905834469043E-02 -0.162062393641909007E-03 -0.666666666666666970E-02 0.755722114123722009E-02 -0.413058279052571960E-11 -0.164604687177714008E-03 -0.609739841581688048E-09 0.822717725546858077E-02 -0.666666666666666970E-02 -0.162060731985976013E-03 0.755721979203457985E-02 -0.406827649258389981E-11 -0.164598252833624004E-03 -0.600736958280085992E-09 0.822717859850459997E-02 -0.164089877345694000E-03 0.822722950591394005E-02 -0.245723983761991010E-09 -0.164740360809531990E-03 -0.666666666666666970E-02 -0.253032242462855987E-09 0.822760867483479999E-02 -0.435063218163868012E-03 -0.666666666666666970E-02 0.926625279535238031E-02 -0.623222898203098044E-03 -0.435035168246062981E-03 -0.671596115313023018E-03 0.921718087666609925E-02 -0.162006191893605003E-03 -0.666666666666666970E-02 0.755728897650767041E-02 -0.248111433259074991E-11 -0.164398182179902004E-03 -0.370407208686471976E-09 0.822722962910260938E-02 -0.666666666666666970E-02 -0.162062408593543990E-03 0.755721778752635031E-02 -0.413141811987293010E-11 -0.164604734980651994E-03 -0.609861061658230019E-09 0.822717721123178945E-02 -0.164359758086323008E-03 0.822717705188595952E-02 -0.403734889040530024E-09 -0.164358443027807014E-03 -0.666666666666666970E-02 -0.400953602691749013E-09 0.822699461577017926E-02 -0.435042369262157974E-03 -0.666666666666666970E-02 0.921678605589727924E-02 -0.671388823848062012E-03 -0.435056017899960990E-03 -0.622715238128926981E-03 0.926570506586136926E-02 -0.666666666666666970E-02 0.897119442339463016E-02 -0.606814776206859971E-03 -0.607757345455318975E-03 0.923427126754335081E-02 0.921865858631707932E-02 -0.671454241685535990E-03 -0.666666666666666970E-02 -0.646212508082314027E-03 0.929466349621911950E-02 -0.192798550395747987E-03 -0.666666666666666970E-02 0.825745095197302943E-02 -0.932632147974648003E-06 0.000000000000000000E+00 -0.167240534661016013E-03 -0.225014932097518994E-07 0.822685297960725967E-02 -0.666666666666666970E-02 -0.435853333045042000E-03 0.921982672920109922E-02 -0.576029289608052963E-03 -0.434241817042410974E-03 -0.604491260295870981E-03 0.915701687929209966E-02 -0.413213785007987978E-03 -0.666666666666666970E-02 0.877723644056935953E-02 -0.541755128362727992E-03 0.000000000000000000E+00 -0.454683425554934008E-03 -0.590990666065788033E-03 0.920776909864309996E-02 -0.666666666666666970E-02 -0.163098438357499011E-03 0.763192860819197037E-02 -0.471912218750803974E-04 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.530165770547245995E-03 0.000000000000000000E+00 -0.266001580375158011E-03 0.861802239077489977E-02 -0.666666666666666970E-02 -0.162004212070115012E-03 0.757730725874160033E-02 -0.176948864124387004E-05 -0.529438965863178955E-03 -0.405266342026753998E-04 0.875391878258711953E-02 0.921979766533967057E-02 -0.602143233936805987E-03 -0.666666666666666970E-02 -0.555336769390312010E-03 0.881585236637339939E-02 -0.666666666666666970E-02 -0.163010868918803012E-03 0.795402864518519014E-02 -0.273140611218113010E-11 0.000000000000000000E+00 -0.164270868404786012E-03 -0.213417891562774994E-09 0.822716826171294990E-02 -0.666666666666666970E-02 0.927693305629184034E-02 -0.632604555455441018E-03 -0.671545581847466972E-03 0.921776054189130979E-02 -0.161978982117798992E-03 -0.666666666666666970E-02 0.755685740523827972E-02 -0.195386055106566009E-11 -0.164191968508161013E-03 -0.291698868024501998E-09 0.822713227244221967E-02 -0.666666666666666970E-02 -0.435040480198179975E-03 0.921687629980929950E-02 -0.671540848019947000E-03 -0.435057906816840021E-03 -0.622872547226495030E-03 0.926585805278096024E-02 -0.666666666666666970E-02 -0.435049192309293017E-03 0.921716878045670054E-02 -0.671606876968350959E-03 0.000000000000000000E+00 -0.435049195086237978E-03 -0.622886319690002022E-03 0.926588938769827043E-02 -0.666666666666666970E-02 0.000000000000000000E+00 -0.162062559109026988E-03 0.755721765240835973E-02 -0.413713441548758037E-11 -0.666666666666666970E-02 0.000000000000000000E+00 -0.164605382377888996E-03 0.000000000000000000E+00 -0.610689049049815970E-09 0.822717709592826925E-02 -0.666666666666666970E-02 -0.435040480192435005E-03 0.921687628880354937E-02 -0.671540836694003019E-03 -0.435057906822584992E-03 -0.622872536396895009E-03 0.926585804147755931E-02 0.822717712936564018E-02 -0.606929320743386971E-09 -0.666666666666666970E-02 -0.411128314894905010E-11 0.755721765697572965E-02 -0.666666666666666970E-02 0.755721765697572965E-02 -0.411128314894905010E-11 -0.606929320770492025E-09 0.822717712936564018E-02 -0.666666666666666970E-02 -0.162062535444674989E-03 0.755721765241194020E-02 -0.413623989629973001E-11 -0.164605264767690994E-03 -0.610558973788768004E-09 0.822717713298089064E-02 -0.164360410611332999E-03 0.822717692931345009E-02 -0.404391742677632997E-09 -0.164359006864496997E-03 -0.666666666666666970E-02 -0.400958843987352986E-09 0.822695177799862010E-02 -0.666666666666666970E-02 -0.435040782694775998E-03 0.921688645197418083E-02 -0.671543140259736040E-03 -0.435057604346205003E-03 -0.622873025349945041E-03 0.926585914054997052E-02 -0.666666666666666970E-02 -0.435040480198194016E-03 0.921687629980919021E-02 -0.671540848019547038E-03 0.000000000000000000E+00 -0.435057906816825981E-03 -0.622872547226044002E-03 0.926585805278038951E-02 -0.666666666666666970E-02 -0.435049192309309985E-03 0.921716878047955032E-02 -0.671606876991275004E-03 0.000000000000000000E+00 -0.435049195086221010E-03 -0.622886319711830048E-03 0.926588938772157991E-02 -0.666666666666666970E-02 -0.162062559108390995E-03 0.755721765240854968E-02 -0.413713449972606017E-11 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.164605382374721012E-03 0.000000000000000000E+00 -0.610689045533098955E-09 0.822717709592844966E-02 -0.162062558941643005E-03 -0.666666666666666970E-02 0.755721765259115014E-02 -0.413713398632515962E-11 -0.164605381794141997E-03 -0.610688990283018023E-09 0.822717713318714926E-02 -0.666666666666666970E-02 -0.162062001821220998E-03 0.755721765238152009E-02 -0.411599110620314974E-11 -0.164602607032291994E-03 -0.607614054224395964E-09 0.822717713004737956E-02 -0.435050784143862991E-03 0.926588367156189023E-02 -0.622883807550479035E-03 -0.435047603238991024E-03 -0.666666666666666970E-02 -0.671594832028562985E-03 0.921711541877510976E-02 -0.435047603238989018E-03 -0.666666666666666970E-02 0.921711541842836976E-02 -0.671594831671507020E-03 -0.435050784143864997E-03 -0.622883807208665977E-03 0.926588367119991069E-02 -0.435040480204769973E-03 -0.666666666666666970E-02 0.921687629995552975E-02 -0.671540847991965043E-03 -0.435057906810250024E-03 -0.622872547162217020E-03 0.926585805272642921E-02 -0.666666666666666970E-02 -0.435040480200663991E-03 0.921687628816156984E-02 -0.671540835793514026E-03 -0.435057906814356981E-03 -0.622872535485858030E-03 0.926585804054953083E-02 -0.164605381790940998E-03 0.822717713318714058E-02 -0.610688980014658047E-09 -0.162062558950573009E-03 -0.666666666666666970E-02 -0.413713385087310001E-11 0.755721765740006036E-02 -0.162062559115579987E-03 -0.666666666666666970E-02 0.755721765739483971E-02 -0.413714011819061037E-11 -0.164605382612299008E-03 -0.610689891393699981E-09 0.822717713311063928E-02 -0.666666666666666970E-02 0.755721765237515972E-02 -0.411128320041545960E-11 -0.606929330558715014E-09 0.822717712936565058E-02 0.822717692676106990E-02 -0.401916224751758011E-09 -0.666666666666666970E-02 -0.398503993602972989E-09 0.822695176179707999E-02 -0.435040782183968022E-03 -0.666666666666666970E-02 0.921688643492003974E-02 -0.671543136482089984E-03 -0.435057604856969990E-03 -0.622873024631875991E-03 0.926585913880641036E-02 -0.666666666666666970E-02 -0.162064603069938997E-03 0.755721765252195983E-02 -0.421568947004117974E-11 -0.164615562667550012E-03 -0.622111785671113991E-09 0.822717714448712063E-02 -0.435031767249090978E-03 -0.666666666666666970E-02 0.921658397565005075E-02 -0.671474830574493001E-03 0.000000000000000000E+00 -0.435066618624524015E-03 -0.622858773502375004E-03 0.926582672349991927E-02 -0.666666666666666970E-02 -0.435031759837511002E-03 0.921598447232290993E-02 -0.671353608077845964E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435066626034670025E-03 0.000000000000000000E+00 -0.622806883367964985E-03 0.926515741217292035E-02 -0.666666666666666970E-02 -0.162062535484635997E-03 0.755721765240869019E-02 -0.413624141680835988E-11 -0.164605264966749998E-03 -0.610559194919630036E-09 0.822717713298093921E-02 0.822717692676106990E-02 -0.401916224778863996E-09 -0.666666666666666970E-02 -0.398503993603033011E-09 0.822695176179709040E-02 -0.666666666666666970E-02 -0.435040782183968022E-03 0.921688643492003974E-02 -0.671543136482092044E-03 -0.435057604856969990E-03 -0.622873024631875991E-03 0.926585913880641036E-02 -0.666666666666666970E-02 0.755721765237515972E-02 -0.411128320041545960E-11 -0.606929330558716048E-09 0.822717712936565058E-02 -0.162062535484635997E-03 -0.666666666666666970E-02 0.755721765240869019E-02 -0.413624144391367996E-11 -0.164605264966749998E-03 -0.610559194919673050E-09 0.822717713298093921E-02 -0.666666666666666970E-02 -0.162064603069936991E-03 0.755721765251413016E-02 -0.421568947063007983E-11 -0.164615562667617992E-03 -0.622111785760815008E-09 0.822717714448719002E-02 -0.666666666666666970E-02 -0.435031767248966023E-03 0.921658397564588047E-02 -0.671474830573548986E-03 0.000000000000000000E+00 -0.435066618624647993E-03 -0.622858773502177028E-03 0.926582672349948039E-02 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435031759837441992E-03 0.921598447233066935E-02 -0.671353608091401961E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435066626034738980E-03 0.000000000000000000E+00 -0.622806883381814042E-03 0.926515741218287940E-02 -0.666666666666666970E-02 -0.162062001821317004E-03 0.755721765238150968E-02 -0.411599108277345033E-11 -0.164602607032771997E-03 -0.607614054759129973E-09 0.822717713004762069E-02 0.926588367541716050E-02 -0.622883811249623975E-03 -0.666666666666666970E-02 -0.671594835878273032E-03 0.921711542243204042E-02 -0.666666666666666970E-02 0.921711542243204042E-02 -0.671594835878273032E-03 -0.622883811249622999E-03 0.926588367541716050E-02 -0.666666666666666970E-02 -0.162062535450388002E-03 0.755721765240867979E-02 -0.413624016808590973E-11 -0.164605264796174991E-03 -0.610559005432624996E-09 0.822717713298075012E-02 -0.164360411311726003E-03 0.822717692923711011E-02 -0.404392344956488014E-09 -0.164359007643746004E-03 -0.666666666666666970E-02 -0.400959412732088008E-09 0.822695177596773942E-02 -0.666666666666666970E-02 -0.435040782621792983E-03 0.921688644954029961E-02 -0.671543139722759002E-03 -0.435057604419182000E-03 -0.622873025249980950E-03 0.926585914030375081E-02 -0.666666666666666970E-02 -0.435040480198179975E-03 0.921687629980933072E-02 -0.671540848019966949E-03 0.000000000000000000E+00 -0.435057906816840021E-03 -0.622872547226514003E-03 0.926585805278100014E-02 -0.666666666666666970E-02 -0.435049192309251980E-03 0.921716878047779999E-02 -0.671606876990949960E-03 0.000000000000000000E+00 -0.435049195086279015E-03 -0.622886319711839047E-03 0.926588938772160073E-02 -0.666666666666666970E-02 0.000000000000000000E+00 -0.162062559109029997E-03 0.755721765240835019E-02 -0.413713457806971004E-11 -0.666666666666666970E-02 0.000000000000000000E+00 -0.164605382377896992E-03 0.000000000000000000E+00 -0.610689049042042024E-09 0.822717709592721974E-02 -0.162062559106823997E-03 -0.666666666666666970E-02 0.755721765240988975E-02 -0.413714016632381966E-11 -0.164605382618112988E-03 -0.610689904846968049E-09 0.822717713311111980E-02 -0.666666666666666970E-02 -0.162062001821342998E-03 0.755721765238148973E-02 -0.411599100244394014E-11 -0.164602607032901993E-03 -0.607614054902494021E-09 0.822717713004762069E-02 -0.435050784145525995E-03 0.926588367157234020E-02 -0.622883807563274030E-03 -0.435047603237328020E-03 -0.666666666666666970E-02 -0.671594832032081004E-03 0.921711541873500989E-02 -0.435047603237328020E-03 -0.666666666666666970E-02 0.921711541879069972E-02 -0.671594832090159004E-03 -0.435050784145525995E-03 -0.622883807618967002E-03 0.926588367163041007E-02 -0.435040480198147991E-03 -0.666666666666666970E-02 0.921687629974903000E-02 -0.671540847957907000E-03 -0.435057906816872005E-03 -0.622872547167174968E-03 0.926585805271907051E-02 -0.666666666666666970E-02 -0.435040480192394998E-03 0.921687628872891983E-02 -0.671540836617235976E-03 -0.435057906822624999E-03 -0.622872536323500051E-03 0.926585804140095046E-02 -0.164605382615034992E-03 0.822717713311110939E-02 -0.610689894920461967E-09 -0.162062559115410987E-03 -0.666666666666666970E-02 -0.413714014137465006E-11 0.755721765703380993E-02 -0.162062559115412993E-03 -0.666666666666666970E-02 0.755721765703380993E-02 -0.413714014146466007E-11 -0.164605382615047000E-03 -0.610689894960657982E-09 0.822717713311110939E-02 -0.666666666666666970E-02 0.921687630349792009E-02 -0.671540851815856984E-03 -0.622872550856052947E-03 0.926585805656933957E-02 0.926585809685315925E-02 -0.622872568564181964E-03 -0.666666666666666970E-02 -0.671540936702172951E-03 0.921687667944103006E-02 -0.164358960784040002E-03 -0.666666666666666970E-02 0.822694398364712948E-02 -0.400847403514244019E-09 0.000000000000000000E+00 -0.164360413019343991E-03 -0.404398689239843015E-09 0.822717692924298041E-02 -0.666666666666666970E-02 -0.162064603069942006E-03 0.755721765251538002E-02 -0.421568947070519009E-11 -0.164615562667628997E-03 -0.622111785771050024E-09 0.822717714448712931E-02 -0.435031767249002019E-03 -0.666666666666666970E-02 0.921658397564648069E-02 -0.671474830573193042E-03 0.000000000000000000E+00 -0.435066618624612973E-03 -0.622858773501632000E-03 0.926582672349898079E-02 -0.666666666666666970E-02 -0.435031759837399004E-03 0.921598447231553042E-02 -0.671353608075763971E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435066626034782023E-03 0.000000000000000000E+00 -0.622806883366930982E-03 0.926515741216851069E-02 -0.666666666666666970E-02 -0.162062558230456009E-03 0.755721765240984032E-02 -0.413710687281216962E-11 -0.164605378253319007E-03 -0.610685055063975992E-09 0.822717713310629033E-02 0.926585809685315925E-02 -0.622872568564181964E-03 -0.666666666666666970E-02 -0.671540936702172951E-03 0.921687667944103006E-02 -0.666666666666666970E-02 -0.164358960784040002E-03 0.822694398364712948E-02 -0.400847403514244019E-09 0.000000000000000000E+00 -0.164360413019343991E-03 -0.404398689239843015E-09 0.822717692924298041E-02 -0.666666666666666970E-02 0.921687630349792009E-02 -0.671540851815856984E-03 -0.622872550856052947E-03 0.926585805656933957E-02 -0.162062558230456009E-03 -0.666666666666666970E-02 0.755721765240984032E-02 -0.413710687281216962E-11 0.000000000000000000E+00 -0.164605378253319007E-03 -0.610685055063975992E-09 0.822717713310629033E-02 -0.666666666666666970E-02 -0.162064604275187002E-03 0.755721677996594975E-02 -0.421580947944237963E-11 -0.164615577356503002E-03 -0.622129690587314019E-09 0.822717714566923060E-02 -0.666666666666666970E-02 -0.435031753444996012E-03 0.921658351635084964E-02 -0.671474729715682982E-03 0.000000000000000000E+00 -0.435066632426207010E-03 -0.622858755231564033E-03 0.926582667773451045E-02 -0.666666666666666970E-02 -0.435031736935309978E-03 0.921598323977216964E-02 -0.671353347083358007E-03 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435066648932866005E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.622806813899395959E-03 0.926515680964767017E-02 -0.666666666666666970E-02 -0.162062535483415999E-03 0.755721764896765975E-02 -0.413624165743050033E-11 -0.164605264995012004E-03 -0.610559231671319039E-09 0.822717713301475070E-02 0.822717692676106990E-02 -0.401916226534076012E-09 -0.666666666666666970E-02 -0.398503963674476984E-09 0.822695175970213945E-02 -0.666666666666666970E-02 0.921688643582909035E-02 -0.671543139638232974E-03 -0.622873028116618000E-03 0.926585914228755976E-02 -0.666666666666666970E-02 -0.162062558230619994E-03 0.755721765087292996E-02 -0.413710692525393008E-11 -0.164605378269444997E-03 -0.610685075298003985E-09 0.822717713311651999E-02 -0.435057895657227020E-03 0.926585809286093083E-02 -0.622872564813008981E-03 -0.435040491358768000E-03 -0.666666666666666970E-02 -0.671540932535798999E-03 0.921687667431765958E-02 -0.666666666666666970E-02 -0.164358960778226998E-03 0.822694398251594058E-02 -0.400847388083247026E-09 -0.164360413020582990E-03 -0.404398690916013012E-09 0.822717692924319031E-02 -0.666666666666666970E-02 -0.435040480197697993E-03 0.921687629888584027E-02 -0.671540847069607041E-03 0.000000000000000000E+00 -0.435057906817322003E-03 -0.622872546317801987E-03 0.926585805183252967E-02 -0.666666666666666970E-02 -0.162127546311873996E-03 0.759121249955739975E-02 -0.382423929799540036E-11 0.000000000000000000E+00 -0.164583726855931989E-03 -0.546878175870651968E-09 0.822717706955712960E-02 -0.666666666666666970E-02 0.000000000000000000E+00 -0.166319285307283012E-03 0.755721766126241958E-02 -0.125511383854575001E-10 -0.666666666666666970E-02 0.000000000000000000E+00 -0.166854277196956007E-03 0.000000000000000000E+00 -0.695070274241472012E-09 0.759121319033947018E-02 -0.435040480198167995E-03 -0.666666666666666970E-02 0.921687629975386988E-02 -0.671540847960958053E-03 -0.435057906816852002E-03 -0.622872547169800038E-03 0.926585805272362069E-02 -0.666666666666666970E-02 -0.435040480192397004E-03 0.921687628872910024E-02 -0.671540836617267960E-03 -0.435057906822622017E-03 -0.622872536323503955E-03 0.926585804140104934E-02 -0.164605382615351010E-03 0.822717713311131062E-02 -0.610689895410060039E-09 -0.162062559115474006E-03 -0.666666666666666970E-02 -0.413714014380830024E-11 0.755721765703380993E-02 -0.162062559115409008E-03 -0.666666666666666970E-02 0.755721765703380038E-02 -0.413714008712550966E-11 -0.164605382615029002E-03 -0.610689894941937954E-09 0.822717713311117010E-02 -0.162062559106887993E-03 -0.666666666666666970E-02 0.755721765240996001E-02 -0.413714022296702998E-11 -0.164605382618428003E-03 -0.610689905200928972E-09 0.822717713311131929E-02 -0.666666666666666970E-02 -0.162062001821339013E-03 0.755721765238150968E-02 -0.411599102940498021E-11 -0.164602607032881989E-03 -0.607614054881568006E-09 0.822717713004766059E-02 -0.435050784145494011E-03 0.926588367157693028E-02 -0.622883807565917966E-03 -0.435047603237360004E-03 -0.666666666666666970E-02 -0.671594832035220962E-03 0.921711541874025049E-02 -0.435047603237360004E-03 -0.666666666666666970E-02 0.921711541879170933E-02 -0.671594832090262003E-03 -0.435050784145494011E-03 -0.622883807618874953E-03 0.926588367163047079E-02 -0.666666666666666970E-02 0.921687630349792009E-02 -0.671540851815856984E-03 -0.622872550856052947E-03 0.926585805656933957E-02 0.822717712936564018E-02 -0.606929320872887989E-09 -0.666666666666666970E-02 -0.411128314877483972E-11 0.755721765699129966E-02 -0.162062559115238003E-03 -0.666666666666666970E-02 0.755721765701823991E-02 -0.413714005475985968E-11 -0.164605382614328987E-03 -0.610689894184682021E-09 0.822717713311110939E-02 -0.666666666666666970E-02 -0.435040480197263987E-03 0.921687629887128941E-02 -0.671540847066263036E-03 -0.435057906817756009E-03 -0.622872546317053020E-03 0.926585805183098923E-02 -0.162127546311872993E-03 -0.666666666666666970E-02 0.759121249955737026E-02 -0.382423935215729017E-11 0.000000000000000000E+00 -0.164583726855924996E-03 -0.546878175863874980E-09 0.822717706955712960E-02 -0.666666666666666970E-02 -0.166319285307283988E-03 0.755721766126244039E-02 -0.125511383583243006E-10 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166854277196956007E-03 0.000000000000000000E+00 -0.695070274239683027E-09 0.759121319033943982E-02 -0.666666666666666970E-02 -0.162062001821339989E-03 0.755721765238148973E-02 -0.411599111074019016E-11 -0.164602607032884998E-03 -0.607614054884462001E-09 0.822717713004762069E-02 0.926588367541610926E-02 -0.622883811249163948E-03 -0.666666666666666970E-02 -0.671594835876070041E-03 0.921711542242228087E-02 -0.666666666666666970E-02 -0.435047603237369979E-03 0.921711541881043914E-02 -0.671594832109593978E-03 -0.435050784145483982E-03 -0.622883807637366021E-03 0.926588367164967938E-02 -0.666666666666666970E-02 0.755721765237515972E-02 -0.411128320041545960E-11 -0.606929330558716048E-09 0.822717712936565058E-02 -0.435040480192415977E-03 -0.666666666666666970E-02 0.921687628876629965E-02 -0.671540836655632996E-03 -0.435057906822602990E-03 -0.622872536360199970E-03 0.926585804143926009E-02 -0.666666666666666970E-02 -0.162816160491764011E-03 0.755629729934281964E-02 -0.145267310755072998E-07 -0.168362009615544009E-03 -0.190258328993087994E-05 0.822792910352069043E-02 -0.666666666666666970E-02 -0.434980287163910990E-03 0.921591582112040938E-02 -0.672220540750253994E-03 0.000000000000000000E+00 -0.435118076441907997E-03 -0.623871210047872993E-03 0.926674480647901068E-02 -0.666666666666666970E-02 -0.391663109199982980E-03 0.857592364724167958E-02 -0.270377370428809022E-03 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.470345934721972021E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.290023568862184014E-03 0.893922648671382006E-02 -0.666666666666666970E-02 -0.317994853278870024E-03 0.807402136254166923E-02 -0.163000017519030990E-03 -0.506466269753455046E-03 -0.315285081402617016E-03 0.895741238458851001E-02 0.822721524635505914E-02 -0.564092130791223009E-09 -0.666666666666666970E-02 -0.389036333446409034E-11 0.758050134032871043E-02 -0.666666666666666970E-02 0.758050134032871043E-02 -0.389036336156913981E-11 -0.564092130737013004E-09 0.822721524635505914E-02 -0.666666666666666970E-02 -0.162062221237022010E-03 0.755675930893081986E-02 -0.416186344820719000E-11 -0.164608243083756012E-03 -0.614516634575204050E-09 0.822717669815483006E-02 -0.435064268078357019E-03 0.926583720685853970E-02 -0.622864535863296978E-03 -0.435034118178185007E-03 -0.666666666666666970E-02 -0.671494763562757962E-03 0.921666478489410919E-02 -0.666666666666666970E-02 -0.435034118314003989E-03 0.921666821285587953E-02 -0.671498187612958996E-03 -0.435064267942557987E-03 -0.622867800074395988E-03 0.926584079408090955E-02 -0.666666666666666970E-02 -0.162064603060627001E-03 0.755721765249593985E-02 -0.421568916772846982E-11 0.000000000000000000E+00 -0.164615562621545010E-03 -0.622111733862203048E-09 0.822717714450407062E-02 -0.666666666666666970E-02 -0.435031767219332013E-03 0.921658395702201066E-02 -0.671474812089471003E-03 0.000000000000000000E+00 -0.435066618654277992E-03 -0.622858755959619019E-03 0.926582670497335011E-02 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435031759978188998E-03 0.921598448078022985E-02 -0.671353613199229967E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435066625894017019E-03 0.000000000000000000E+00 -0.622806887499908037E-03 0.926515741657525996E-02 -0.435040451653885985E-03 -0.666666666666666970E-02 0.921687738713831972E-02 -0.671542772385395025E-03 -0.435057935358636985E-03 -0.622874555977533041E-03 0.926586008373840933E-02 -0.666666666666666970E-02 -0.162062556280765003E-03 0.755721766011261970E-02 -0.413703259232341988E-11 -0.164605370112559998E-03 -0.610674307777927029E-09 0.822717713673958018E-02 -0.435057871981406005E-03 0.926586389913170057E-02 -0.622877920215035986E-03 -0.435040515036651005E-03 -0.666666666666666970E-02 -0.671546676151164976E-03 0.921688293990952957E-02 -0.164358975070530991E-03 -0.666666666666666970E-02 0.822694462180378026E-02 -0.400864471382896996E-09 -0.164360425623554987E-03 -0.404406184767255019E-09 0.822717693084428978E-02 -0.435040521728764014E-03 -0.666666666666666970E-02 0.921688316448704067E-02 -0.671546726860146044E-03 -0.435057865289874995E-03 -0.622877930792390950E-03 0.926586392317507029E-02 -0.666666666666666970E-02 -0.162062560143134006E-03 0.755721765072739013E-02 -0.413717992975032974E-11 -0.164605387803321994E-03 -0.610695685352230028E-09 0.822717713417366048E-02 -0.435057920486110002E-03 0.926586013716025961E-02 -0.622874579485410002E-03 -0.435040466527716017E-03 -0.666666666666666970E-02 -0.671542885086807029E-03 0.921687788626173940E-02 -0.164358940726689012E-03 -0.666666666666666970E-02 0.822694332169499921E-02 -0.400825748326332007E-09 -0.164360402303031009E-03 -0.404387092386443002E-09 0.822717693284892929E-02 -0.666666666666666970E-02 0.755853528834142963E-02 -0.410907237024926035E-11 -0.609517161481544010E-09 0.822725835180459011E-02 0.822699835769964000E-02 -0.229769146389950011E-09 -0.666666666666666970E-02 -0.552503881834807986E-11 0.815277410257971014E-02 -0.431800076739472991E-03 -0.666666666666666970E-02 0.911683250971202959E-02 -0.645107775821699976E-03 -0.438245882118646008E-03 -0.614761191522313001E-03 0.925173111512469018E-02 -0.666666666666666970E-02 -0.162066807231452001E-03 0.755721537354540028E-02 -0.430232887169997028E-11 -0.164626562654960999E-03 -0.634707558119433995E-09 0.822717705664591063E-02 -0.435011184592201997E-03 -0.666666666666666970E-02 0.921541718621200941E-02 -0.670832821851847973E-03 0.000000000000000000E+00 -0.435087195563132003E-03 -0.622361407616723998E-03 0.926525297162917975E-02 -0.666666666666666970E-02 -0.435024023581949979E-03 0.921463310923579040E-02 -0.670757573524478033E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435074360637873977E-03 0.000000000000000000E+00 -0.622355873608441013E-03 0.926393658622583042E-02 -0.666666666666666970E-02 -0.162274431703270989E-03 0.755738308036761966E-02 -0.318241338988657979E-10 -0.165730950021739988E-03 -0.457847648587293988E-08 0.822724281267098005E-02 0.822699408685815918E-02 -0.228425571392821005E-09 -0.666666666666666970E-02 -0.569261988092565005E-11 0.815841787771750919E-02 -0.666666666666666970E-02 -0.432079853778008993E-03 0.912452759981858923E-02 -0.647134253481845053E-03 -0.437974705900792997E-03 -0.615294574958452005E-03 0.925273449425225922E-02 -0.666666666666666970E-02 0.755849436233489019E-02 -0.410955652784976012E-11 -0.609618023537668031E-09 0.822725856237870035E-02 -0.162292777295442987E-03 -0.666666666666666970E-02 0.755742123690941042E-02 -0.382880604873281030E-10 -0.165829805992468013E-03 -0.549637808756437989E-08 0.822725133924324080E-02 -0.666666666666666970E-02 -0.162519832090693000E-03 0.758487998779587991E-02 -0.277297390858832013E-04 -0.531411093209674000E-03 -0.271306402284292975E-03 0.895289512037131946E-02 -0.666666666666666970E-02 -0.435144737424841995E-03 0.927049381409235027E-02 -0.627195150452744977E-03 0.000000000000000000E+00 -0.434953604275157982E-03 -0.673646028178072039E-03 0.921927160555094036E-02 -0.666666666666666970E-02 -0.162365832331587992E-03 0.770469465208874042E-02 -0.341374299192897006E-05 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.530093823283070008E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.670683422235064935E-04 0.878401693892019025E-02 -0.666666666666666970E-02 -0.406469352273000986E-03 0.868845376516133062E-02 -0.487445861240936023E-03 -0.459955486486156979E-03 -0.561455535102076998E-03 0.917876821200271954E-02 0.822724019224538999E-02 -0.586236000132447954E-09 -0.666666666666666970E-02 -0.399393158957938003E-11 0.756942275725619972E-02 -0.666666666666666970E-02 0.756938783599863958E-02 -0.398490214171924003E-11 -0.582276440781839991E-09 0.822717710481313952E-02 -0.666666666666666970E-02 -0.162064188320470992E-03 0.755638508915526964E-02 -0.426955665163783005E-11 -0.164621776680132990E-03 -0.630372622134120969E-09 0.822717559459638058E-02 -0.435097665935211988E-03 0.926770146162825964E-02 -0.624846566023037008E-03 -0.435000709682981980E-03 -0.666666666666666970E-02 -0.673347124029987048E-03 0.921745137510697997E-02 -0.666666666666666970E-02 -0.164221437098695006E-03 0.822590307436798021E-02 -0.301540515380110992E-09 -0.164276463597796989E-03 -0.317267646597123000E-09 0.822720896113241992E-02 -0.666666666666666970E-02 -0.435040484785290994E-03 0.921687621618138067E-02 -0.671540621958418996E-03 -0.435057902230129995E-03 -0.622872302682172954E-03 0.926585782249826975E-02 -0.666666666666666970E-02 -0.162127341684405001E-03 0.759121275229191969E-02 -0.381774206802719039E-11 0.000000000000000000E+00 -0.164583013632061998E-03 -0.545969305880644018E-09 0.822717722380156974E-02 -0.666666666666666970E-02 -0.166319316569640000E-03 0.755721776450153995E-02 -0.125637203248046992E-10 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.166854313971363013E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.695766372139392979E-09 0.759121342784058965E-02 -0.435007341079486002E-03 -0.666666666666666970E-02 0.921767423501631059E-02 -0.673397510119870990E-03 -0.435091037539934999E-03 -0.624857046910582011E-03 0.926772529095947969E-02 -0.666666666666666970E-02 -0.435042780427758998E-03 0.921682237061272926E-02 -0.671414330503569993E-03 -0.435055606761638005E-03 -0.622737221386400041E-03 0.926573013279861045E-02 -0.164512437178424009E-03 0.822720912015354085E-02 -0.478141451603002007E-09 -0.162035132712254993E-03 -0.666666666666666970E-02 -0.321987108658194010E-11 0.755727832217645019E-02 -0.162065357686449010E-03 -0.666666666666666970E-02 0.755727582940845036E-02 -0.423979678728262026E-11 -0.164618725864026005E-03 -0.625584794059249953E-09 0.822717479197863921E-02 -0.162035093479698011E-03 -0.666666666666666970E-02 0.755725620859342036E-02 -0.322006454574526012E-11 -0.164512451389200987E-03 -0.478178423362356003E-09 0.822720912019037076E-02 -0.666666666666666970E-02 -0.162063731980670002E-03 0.755721533834740992E-02 -0.418199544479783967E-11 -0.164611237647310007E-03 -0.617212851806125045E-09 0.822717573520461917E-02 -0.435072205240186985E-03 0.926779361315555050E-02 -0.624886867600672031E-03 -0.435026179501545984E-03 -0.666666666666666970E-02 -0.673540811812492046E-03 0.921830845677282934E-02 -0.435026047900845974E-03 -0.666666666666666970E-02 0.921631021319617959E-02 -0.671342130464456957E-03 -0.435072336809864014E-03 -0.622763518227145035E-03 0.926572086598697990E-02 -0.666666666666666970E-02 0.755741246850622026E-02 -0.410910644194978984E-11 -0.606515170668247041E-09 0.822717712895315936E-02 0.822711877443555936E-02 -0.222069534738638002E-09 -0.666666666666666970E-02 -0.774169806703297047E-11 0.819168186272309015E-02 -0.433633789741900023E-03 -0.666666666666666970E-02 0.917151071300196033E-02 -0.660635211420094017E-03 -0.436454594164531018E-03 -0.620192759941485053E-03 0.926043761605101039E-02 -0.666666666666666970E-02 -0.435400642028797977E-03 0.924569487162926924E-02 -0.602675681812970990E-03 -0.434697126891870982E-03 -0.641341627123363030E-03 0.919049697819149941E-02 -0.417766802147073019E-03 -0.666666666666666970E-02 0.884119022200594018E-02 -0.565196699593391045E-03 0.000000000000000000E+00 -0.450926780658108997E-03 -0.597865634330268050E-03 0.921780464690035925E-02 -0.666666666666666970E-02 -0.162927680290126010E-03 0.761083369281826978E-02 -0.416350633852817005E-04 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.530230645664085047E-03 0.000000000000000000E+00 -0.269827347276449016E-03 0.866009136742811915E-02 -0.666666666666666970E-02 -0.162144345304456006E-03 0.760092331739826033E-02 -0.320930758282650987E-05 -0.529813749405098054E-03 -0.838256824281413042E-04 0.879776792041751972E-02 0.822717682684740922E-02 -0.303187285528630003E-09 -0.666666666666666970E-02 -0.349301427021029015E-11 0.790044609360177931E-02 -0.666666666666666970E-02 -0.407938480218887980E-03 0.871084858846606080E-02 -0.512295469801526959E-03 -0.458837927071482975E-03 -0.588085060255285046E-03 0.920289209649226975E-02 -0.666666666666666970E-02 0.755933306797651038E-02 -0.410021685751258990E-11 -0.607789044576714013E-09 0.822725774217296021E-02 -0.162167689058960993E-03 -0.666666666666666970E-02 0.755666327531443957E-02 -0.112741283804007006E-10 -0.165135297900302988E-03 -0.164049635527119992E-08 0.822716577111987958E-02 -0.666666666666666970E-02 -0.162816151525113997E-03 0.755629731334936036E-02 -0.145250995281190996E-07 -0.168361965007562991E-03 -0.190237271058338006E-05 0.822792892080568931E-02 -0.666666666666666970E-02 -0.434980646616620992E-03 0.921590961843673021E-02 -0.672201796129614040E-03 0.000000000000000000E+00 -0.435117717236683025E-03 -0.623850865689214011E-03 0.926672719210277010E-02 -0.666666666666666970E-02 -0.391662568195668987E-03 0.857592608264677939E-02 -0.270377852525331014E-03 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.470346296009015001E-03 0.000000000000000000E+00 -0.290024881064321978E-03 0.893923985221735037E-02 -0.666666666666666970E-02 -0.317995102343517999E-03 0.807402209586039962E-02 -0.163000191627275006E-03 -0.506466182711903996E-03 -0.315285148839635984E-03 0.895741240163453953E-02 0.822721524635562987E-02 -0.564092115954154982E-09 -0.666666666666666970E-02 -0.389036326356696026E-11 0.758050134875735016E-02 -0.666666666666666970E-02 0.758050134875735016E-02 -0.389036331777706000E-11 -0.564092115791524967E-09 0.822721524635562987E-02 -0.666666666666666970E-02 -0.162059230234564013E-03 0.755676875879613980E-02 -0.404977791069632030E-11 -0.164607827252304003E-03 -0.598672124761886979E-09 0.822718791318045005E-02 -0.435030727375933994E-03 0.921782544139550068E-02 -0.671763596290095993E-03 -0.435067658311500988E-03 -0.666666666666666970E-02 -0.623720661515962042E-03 0.926678721368117027E-02 -0.666666666666666970E-02 -0.435067665337827991E-03 0.926637336566514015E-02 -0.623334437794025985E-03 -0.435030720348006980E-03 -0.671597031953381036E-03 0.921719062426287934E-02 -0.666666666666666970E-02 -0.162064554103597002E-03 0.755721778425398963E-02 -0.421379617299167968E-11 -0.164615465305043013E-03 -0.621841414828084976E-09 0.822717725632217053E-02 -0.666666666666666970E-02 -0.435032270976548998E-03 0.921659997236582915E-02 -0.671477595984673039E-03 0.000000000000000000E+00 -0.435066114983777015E-03 -0.622858547503055051E-03 0.926582760612120956E-02 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435031835475485022E-03 0.921598351720689934E-02 -0.671332921939591038E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435066550409885975E-03 0.000000000000000000E+00 -0.622784365706614980E-03 0.926515556356406061E-02 -0.435081591675335006E-03 -0.666666666666666970E-02 0.926715723864761018E-02 -0.624060476111682047E-03 -0.435016790462375985E-03 -0.671761223543174959E-03 0.921784490296891079E-02 -0.666666666666666970E-02 -0.162062411240925002E-03 0.755721810753046033E-02 -0.413154453738800000E-11 -0.164605196762570010E-03 -0.609894095762123987E-09 0.822717754580637971E-02 -0.435056188182044996E-03 0.926585891365665042E-02 -0.622867894213649997E-03 -0.435042198968283974E-03 -0.666666666666666970E-02 -0.671546446351377053E-03 0.921692872689685964E-02 -0.164341192321288998E-03 -0.666666666666666970E-02 0.822700721314235071E-02 -0.388276097998368010E-09 -0.164366485757635008E-03 -0.391285012821627015E-09 0.822719359814269927E-02 -0.435043836697356014E-03 -0.666666666666666970E-02 0.921698374849288030E-02 -0.671558867523572019E-03 -0.435054550554351985E-03 -0.622870487342790016E-03 0.926586485739499914E-02 -0.666666666666666970E-02 -0.162059863256517012E-03 0.755722860343106036E-02 -0.403706320675834994E-11 -0.164610172315551000E-03 -0.596713959736372018E-09 0.822719066745875027E-02 -0.435029178909017021E-03 0.921782856828711958E-02 -0.671763646252384965E-03 -0.435069206480010991E-03 -0.666666666666666970E-02 -0.623759080059059956E-03 0.926682896934285030E-02 -0.165176323153183011E-03 -0.666666666666666970E-02 0.822774275548648036E-02 -0.421612183862497016E-09 -0.164358865204686000E-03 -0.403616558608118980E-09 0.822717766773221025E-02 -0.666666666666666970E-02 0.755853528843129958E-02 -0.410907228794313989E-11 -0.609517161291164049E-09 0.822725835180460052E-02 0.822699835770025062E-02 -0.229769141627132009E-09 -0.666666666666666970E-02 -0.552503936541298003E-11 0.815277412222070036E-02 -0.431800077704224978E-03 -0.666666666666666970E-02 0.911683253562737932E-02 -0.645107782251850984E-03 -0.438245881184861979E-03 -0.614761192822427965E-03 0.925173111806255020E-02 -0.666666666666666970E-02 -0.162066792004203990E-03 0.755722059760233992E-02 -0.430128633401459982E-11 -0.164626435077370987E-03 -0.634553335781814010E-09 0.822717711494816949E-02 -0.435011239879684001E-03 -0.666666666666666970E-02 0.921541899333368046E-02 -0.670833188949691958E-03 0.000000000000000000E+00 -0.435087140296697020E-03 -0.622361445147185050E-03 0.926525312625075051E-02 -0.666666666666666970E-02 -0.435024263236271974E-03 0.921464288704679918E-02 -0.670759624758249955E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.435074121043737996E-03 0.000000000000000000E+00 -0.622356277676372038E-03 0.926393943568542926E-02 -0.666666666666666970E-02 -0.162274431660521006E-03 0.755738313912261012E-02 -0.318240683403240976E-10 -0.165730949472054013E-03 -0.457846708968348003E-08 0.822724281302687072E-02 0.822699408685964931E-02 -0.228425558756176994E-09 -0.666666666666666970E-02 -0.569262164845434979E-11 0.815841793296116953E-02 -0.666666666666666970E-02 -0.432079856432105011E-03 0.912452767230425080E-02 -0.647134271290311994E-03 -0.437974703324669004E-03 -0.615294578525974042E-03 0.925273450239525071E-02 -0.666666666666666970E-02 0.755849436258760991E-02 -0.410955652506049982E-11 -0.609618023001794995E-09 0.822725856237871075E-02 -0.162292777280510000E-03 -0.666666666666666970E-02 0.755742125771192031E-02 -0.382880319923928003E-10 -0.165829805796784997E-03 -0.549637401523144008E-08 0.822725133937288015E-02 -0.666666666666666970E-02 -0.162519791749040997E-03 0.758486654085705012E-02 -0.277312982546554016E-04 -0.531411130485370960E-03 -0.271318851749606982E-03 0.895290555306309938E-02 -0.666666666666666970E-02 -0.435144726093062025E-03 0.927049355419245005E-02 -0.627194917416272046E-03 0.000000000000000000E+00 -0.434953615617772005E-03 -0.673646073339009962E-03 0.921927163146945942E-02 -0.666666666666666970E-02 -0.162365825870953989E-03 0.770469126010004009E-02 -0.341374404077266992E-05 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.530093736436846978E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.670688877278153049E-04 0.878401724365305073E-02 -0.666666666666666970E-02 -0.406469238190150994E-03 0.868845253580060976E-02 -0.487445397507600979E-03 -0.459955572937245996E-03 -0.561455436984956992E-03 0.917876806761750916E-02 0.822724019224498060E-02 -0.586236017994280044E-09 -0.666666666666666970E-02 -0.399393167827312004E-11 0.756942274805354979E-02 -0.666666666666666970E-02 0.756938782679772958E-02 -0.398490217607864004E-11 -0.582276458527030000E-09 0.822717710481316034E-02 -0.666666666666666970E-02 -0.162064188279850002E-03 0.755638507164865976E-02 -0.426955655883516001E-11 -0.164621776652662993E-03 -0.630372617736862023E-09 0.822717559470338006E-02 -0.435097665884428989E-03 0.926770143129381982E-02 -0.624846533095091048E-03 -0.435000709733791000E-03 -0.666666666666666970E-02 -0.673347090449469992E-03 0.921745134737769929E-02 -0.666666666666666970E-02 -0.164221437102121003E-03 0.822590307607582068E-02 -0.301540531002169025E-09 -0.164276463596261000E-03 -0.317267642602591980E-09 0.822720896113589978E-02 -0.666666666666666970E-02 -0.435040484785382989E-03 0.921687621618049943E-02 -0.671540621954609001E-03 -0.435057902230038000E-03 -0.622872302677949011E-03 0.926585782249450020E-02 -0.666666666666666970E-02 -0.162127341684184989E-03 0.759121275229850991E-02 -0.381774206063630980E-11 0.000000000000000000E+00 -0.164583013631449993E-03 -0.545969304848883959E-09 0.822717722380189066E-02 -0.666666666666666970E-02 0.000000000000000000E+00 -0.166319316568878998E-03 0.755721776450365024E-02 -0.125637200248759998E-10 -0.666666666666666970E-02 0.000000000000000000E+00 -0.166854313970594991E-03 0.000000000000000000E+00 -0.695766357075677050E-09 0.759121342784747997E-02 -0.435007341721630001E-03 -0.666666666666666970E-02 0.921767422715922928E-02 -0.673397481031394029E-03 -0.435091036898059991E-03 -0.624857014917886951E-03 0.926772526275846005E-02 -0.666666666666666970E-02 -0.435042780430306005E-03 0.921682237057486024E-02 -0.671414330383147007E-03 -0.435055606759090997E-03 -0.622737221255020975E-03 0.926573013268000914E-02 -0.164512437176590000E-03 0.822720912015702070E-02 -0.478141445901109973E-09 -0.162035132711022011E-03 -0.666666666666666970E-02 -0.321987104749231018E-11 0.755727832223314962E-02 -0.162065357632703997E-03 -0.666666666666666970E-02 0.755727582947162985E-02 -0.423979469854183965E-11 -0.164618725596388991E-03 -0.625584490453962995E-09 0.822717479206993944E-02 -0.162035093478465002E-03 -0.666666666666666970E-02 0.755725620865007990E-02 -0.322006453375858990E-11 -0.164512451387367005E-03 -0.478178417741383017E-09 0.822720912019385062E-02 -0.666666666666666970E-02 -0.162063731945476990E-03 0.755721533840931960E-02 -0.418199409455997034E-11 -0.164611237471814999E-03 -0.617212655483738035E-09 0.822717573526372987E-02 -0.435072204818992981E-03 0.926779358414435948E-02 -0.624886835257608040E-03 -0.435026179922837024E-03 -0.666666666666666970E-02 -0.673540781035909049E-03 0.921830844145678924E-02 -0.435026048326556981E-03 -0.666666666666666970E-02 0.921631022735146070E-02 -0.671342133550576967E-03 -0.435072336384253024E-03 -0.622763518765494998E-03 0.926572086739899971E-02 -0.666666666666666970E-02 0.755741246850622026E-02 -0.410910644194978984E-11 -0.606515170668247041E-09 0.822717712895315936E-02 0.822711877443555936E-02 -0.222069534792841002E-09 -0.666666666666666970E-02 -0.774169806704296934E-11 0.819168186272316995E-02 -0.433633789741903004E-03 -0.666666666666666970E-02 0.917151071300205921E-02 -0.660635211420117002E-03 -0.436454594164527982E-03 -0.620192759941490040E-03 0.926043761605102080E-02 -0.666666666666666970E-02 -0.435400642045188024E-03 0.924569487205297025E-02 -0.602675682168614998E-03 -0.434697126875424015E-03 -0.641341627119159036E-03 0.919049697820984064E-02 -0.417766802147158021E-03 -0.666666666666666970E-02 0.884119022200715969E-02 -0.565196699593685948E-03 0.000000000000000000E+00 -0.450926780658037006E-03 -0.597865634330208961E-03 0.921780464690040956E-02 -0.666666666666666970E-02 -0.162927680290140999E-03 0.761083369282181989E-02 -0.416350633850501013E-04 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.530230645664087974E-03 0.000000000000000000E+00 -0.269827347274097977E-03 0.866009136742689964E-02 -0.666666666666666970E-02 -0.162144345304457009E-03 0.760092331739868968E-02 -0.320930758282241997E-05 -0.529813749405099029E-03 -0.838256824279560954E-04 0.879776792041734972E-02 0.822717682684740922E-02 -0.303187285555688993E-09 -0.666666666666666970E-02 -0.349301427021062013E-11 0.790044609360190074E-02 -0.666666666666666970E-02 -0.407938480218909014E-03 0.871084858846631060E-02 -0.512295469801619008E-03 -0.458837927071466983E-03 -0.588085060255303044E-03 0.920289209649229924E-02 -0.666666666666666970E-02 0.755933306797651038E-02 -0.410021674909242031E-11 -0.607789044576720010E-09 0.822725774217296021E-02 -0.162167689058960993E-03 -0.666666666666666970E-02 0.755666327531452024E-02 -0.112741284887906999E-10 -0.165135297900302012E-03 -0.164049635526676995E-08 0.822716577111987958E-02 -0.666666666666666970E-02 -0.162064381314455013E-03 0.755721681642354016E-02 -0.420708647347467005E-11 -0.164614463272000995E-03 -0.620861048269696965E-09 0.822717666428922009E-02 -0.666666666666666970E-02 -0.435032929063819981E-03 0.921662843024339054E-02 -0.671487062718633038E-03 0.000000000000000000E+00 -0.435065457005957975E-03 -0.622863607135336990E-03 0.926583682639911012E-02 -0.666666666666666970E-02 -0.435033606033594978E-03 0.921549100307902068E-02 -0.670764254703717011E-03 0.000000000000000000E+00 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435064780144060010E-03 0.000000000000000000E+00 -0.622228101134277012E-03 0.926458636134656031E-02 -0.666666666666666970E-02 -0.162412248298241007E-03 0.755747465195114043E-02 -0.132892288474876992E-09 -0.166453306564733994E-03 -0.187762010463222004E-07 0.822729692880195054E-02 0.822701112868542972E-02 -0.238165066447310994E-09 -0.666666666666666970E-02 -0.481872285692390998E-11 0.811743485472943976E-02 -0.666666666666666970E-02 0.907030660461941030E-02 -0.635289343114582030E-03 -0.614711342014916053E-03 0.924712095234279940E-02 -0.666666666666666970E-02 -0.162067491346243997E-03 0.755721660010066996E-02 -0.433042344878489997E-11 -0.164637989129536011E-03 -0.639067222491722952E-09 0.822718313255254956E-02 -0.435123629441636984E-03 0.926610912903469072E-02 -0.623064810739766984E-03 -0.434974730176162019E-03 -0.666666666666666970E-02 -0.671125339170808039E-03 0.921519637816755992E-02 -0.666666666666666970E-02 -0.166000837408311988E-03 0.822475961254992040E-02 -0.105963041200035007E-07 -0.166020054886163010E-03 -0.114023574905160996E-07 0.822673401751872028E-02 -0.666666666666666970E-02 -0.435040411541543013E-03 0.921687888628672063E-02 -0.671545404567263976E-03 -0.435057975467457981E-03 -0.622877304336513001E-03 0.926586291132954973E-02 -0.666666666666666970E-02 -0.162130707049625006E-03 0.759120585643323006E-02 -0.392634071196178009E-11 0.000000000000000000E+00 -0.164597967653741013E-03 -0.561253610552618950E-09 0.822717507317871979E-02 -0.666666666666666970E-02 0.000000000000000000E+00 -0.166319143409792997E-03 0.755721565731708005E-02 -0.124778474586887002E-10 -0.666666666666666970E-02 0.000000000000000000E+00 -0.166854049995611003E-03 0.000000000000000000E+00 -0.691000750970318969E-09 0.759120679480445974E-02 -0.435051311183006005E-03 -0.666666666666666970E-02 0.926639102794180047E-02 -0.623356751569072967E-03 -0.435047076190057990E-03 -0.671803857406816016E-03 0.921783849635480004E-02 -0.666666666666666970E-02 -0.435004887685989992E-03 0.921760501413688982E-02 -0.673382235862111022E-03 -0.435093489874442013E-03 -0.624855064858449022E-03 0.926773123149998018E-02 -0.166320189393602005E-03 0.822674016833681992E-02 -0.176036669988993003E-07 -0.162405090688964003E-03 -0.666666666666666970E-02 -0.125356963990057004E-09 0.755633079584900041E-02 -0.162059610339947000E-03 -0.666666666666666970E-02 0.755636445209539962E-02 -0.409725503580782012E-11 -0.164619621806747006E-03 -0.605978989258597974E-09 0.822719229488561021E-02 -0.162405940044832003E-03 -0.666666666666666970E-02 0.755662367990892966E-02 -0.125254866802548012E-09 -0.166319952568922996E-03 -0.175849783175382999E-07 0.822674014969775047E-02 -0.666666666666666970E-02 -0.162059866198233001E-03 0.755722656090779960E-02 -0.403692298108515966E-11 -0.164606538526818004E-03 -0.596577171893270041E-09 0.822718795529438045E-02 -0.435028073525391026E-03 0.921786254908986934E-02 -0.671799805131304017E-03 -0.435070311635940977E-03 -0.666666666666666970E-02 -0.623819803880377045E-03 0.926689343860783929E-02 -0.435070247954426976E-03 -0.666666666666666970E-02 0.926779304085995070E-02 -0.624694668192695042E-03 -0.435028137220242991E-03 -0.672979588001651041E-03 0.921846396898064017E-02 -0.666666666666666970E-02 0.894946699024212929E-02 -0.600122097862237002E-03 -0.606052261066108988E-03 0.923126173183646918E-02 0.822707325920852932E-02 -0.439334748407924013E-09 -0.666666666666666970E-02 -0.340850404729477990E-11 0.767218067483432017E-02 -0.163197277611309987E-03 -0.666666666666666970E-02 0.767185136575801018E-02 -0.332821151740468006E-08 -0.167963560070563987E-03 -0.390028261492079976E-06 0.822779264793695002E-02 -0.666666666666666970E-02 -0.424429129575981982E-03 0.884286864444589968E-02 -0.396957256272790994E-03 -0.445115815542256987E-03 -0.385983334887198980E-03 0.902131707790924942E-02 -0.407763611263302025E-03 -0.666666666666666970E-02 0.870515217458626978E-02 -0.498571990041084007E-03 0.000000000000000000E+00 -0.458970290280038998E-03 -0.570346192985176997E-03 0.918907244525967067E-02 -0.666666666666666970E-02 -0.322071566303724021E-03 0.801064459323599064E-02 -0.151974252254838009E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.504268248287133031E-03 0.000000000000000000E+00 -0.280047589106463980E-03 0.858275366599436061E-02 -0.666666666666666970E-02 -0.162136502882231009E-03 0.755886084998397007E-02 -0.797239446559356004E-11 -0.165003120614180988E-03 -0.116699722701056003E-08 0.822721467884345026E-02 0.926072857339886937E-02 -0.621240348529511033E-03 -0.666666666666666970E-02 -0.664187000217392946E-03 0.918478778839808968E-02 -0.666666666666666970E-02 -0.434091712912973990E-03 0.918381835623783074E-02 -0.661965111513909037E-03 -0.436002089949920021E-03 -0.618955328845967952E-03 0.925997826239693955E-02 -0.666666666666666970E-02 0.755828330701808961E-02 -0.411187050494477969E-11 -0.610057942551154034E-09 0.822725843300332015E-02 -0.162643320272068002E-03 -0.666666666666666970E-02 0.760168784361417004E-02 -0.350264221860245974E-04 -0.531763987837332967E-03 -0.285598197078351006E-03 0.896123574808329985E-02 -0.666666666666666970E-02 -0.162064603067110991E-03 0.755721765251989031E-02 -0.421568927958496012E-11 0.000000000000000000E+00 -0.164615562653504011E-03 -0.622111769806188027E-09 0.822717714448987017E-02 -0.666666666666666970E-02 -0.435031767299973019E-03 0.921658397588867064E-02 -0.671474829222033949E-03 0.000000000000000000E+00 -0.435066618573651026E-03 -0.622858771889590011E-03 0.926582672216797083E-02 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435031759821466978E-03 0.921598447351964015E-02 -0.671353608292523957E-03 -0.666666666666666970E-02 0.000000000000000000E+00 -0.435066626050711989E-03 0.000000000000000000E+00 -0.622806883477106956E-03 0.926515741405328948E-02 -0.666666666666666970E-02 -0.162062535484502993E-03 0.755721765241602027E-02 -0.413624146533982998E-11 -0.164605264966013012E-03 -0.610559194090089995E-09 0.822717713298093921E-02 0.822717692676106990E-02 -0.401916224683843019E-09 -0.666666666666666970E-02 -0.398503994321098006E-09 0.822695176184735054E-02 -0.666666666666666970E-02 0.921688643859516939E-02 -0.671543140262901043E-03 -0.622873028246735054E-03 0.926585914257952933E-02 -0.666666666666666970E-02 -0.162062560104490004E-03 0.755721765317601043E-02 -0.413717829056970014E-11 -0.164605387585608994E-03 -0.610695437742429953E-09 0.822717713403002017E-02 -0.435057919911320979E-03 0.926585996691042975E-02 -0.622874416570012033E-03 -0.435040467102555022E-03 -0.666666666666666970E-02 -0.671542718462655053E-03 0.921687774051401995E-02 -0.666666666666666970E-02 -0.164358956864967009E-03 0.822694333429544941E-02 -0.400838058665778016E-09 -0.164360413187303008E-03 -0.404399206698849991E-09 0.822717692928500062E-02 -0.666666666666666970E-02 -0.435040480198734978E-03 0.921687629883806946E-02 -0.671540846988837015E-03 0.000000000000000000E+00 -0.435057906816285018E-03 -0.622872546234125974E-03 0.926585805175031071E-02 -0.666666666666666970E-02 -0.162127546305523010E-03 0.759121249965894005E-02 -0.382423911661819962E-11 0.000000000000000000E+00 -0.164583726826487010E-03 -0.546878146458841976E-09 0.822717706956129988E-02 -0.666666666666666970E-02 0.000000000000000000E+00 -0.166319285296618989E-03 0.755721766130180994E-02 -0.125511345494907995E-10 -0.666666666666666970E-02 0.000000000000000000E+00 -0.166854277186167003E-03 0.000000000000000000E+00 -0.695070062425207962E-09 0.759121319044449988E-02 -0.435040455888697000E-03 -0.666666666666666970E-02 0.921687736420685050E-02 -0.671542633493042978E-03 -0.435057931124196984E-03 -0.622874398846488971E-03 0.926585992663164944E-02 -0.666666666666666970E-02 -0.435040480266267004E-03 0.921687629128858064E-02 -0.671540837058780997E-03 -0.435057906748759010E-03 -0.622872536301050019E-03 0.926585804176803009E-02 -0.164605382599736005E-03 0.822717713315264040E-02 -0.610689878464067020E-09 -0.162062559112368987E-03 -0.666666666666666970E-02 -0.413714002853130017E-11 0.755721765707715026E-02 -0.162062560979487995E-03 -0.666666666666666970E-02 0.755721765716044040E-02 -0.413721119467013017E-11 -0.164605391905931007E-03 -0.610700236970853951E-09 0.822717713430983974E-02 -0.162062559104255008E-03 -0.666666666666666970E-02 0.755721765270791958E-02 -0.413714010482357039E-11 -0.164605382602643998E-03 -0.610689887899547005E-09 0.822717713315264040E-02 -0.666666666666666970E-02 -0.162062002688690993E-03 0.755721765325861970E-02 -0.411602396034761983E-11 -0.164602611349319008E-03 -0.607618836657478970E-09 0.822717713082904076E-02 -0.435050795545137013E-03 0.926588559200255957E-02 -0.622885679652955006E-03 -0.435047591837533988E-03 -0.666666666666666970E-02 -0.671596715586648962E-03 0.921711691727978080E-02 -0.435047591829257026E-03 -0.666666666666666970E-02 0.921711503527408021E-02 -0.671594744907884041E-03 -0.435050795553413975E-03 -0.622883788869598950E-03 0.926588363014568990E-02 -0.666666666666666970E-02 0.921687630349790968E-02 -0.671540851815855032E-03 -0.622872550856051971E-03 0.926585805656933957E-02 0.822717712936564018E-02 -0.606929320764278968E-09 -0.666666666666666970E-02 -0.411128314877385017E-11 0.755721765699138986E-02 -0.162062559115238003E-03 -0.666666666666666970E-02 0.755721765701833012E-02 -0.413714010896968962E-11 -0.164605382614328987E-03 -0.610689894184595994E-09 0.822717713311110939E-02 -0.666666666666666970E-02 -0.435040480219567002E-03 0.921687629951207024E-02 -0.671540847113225037E-03 -0.435057906795455001E-03 -0.622872546234027962E-03 0.926585805179963064E-02 -0.162127546311748987E-03 -0.666666666666666970E-02 0.759121249955777965E-02 -0.382423940240163967E-11 0.000000000000000000E+00 -0.164583726855365006E-03 -0.546878175305287047E-09 0.822717706955716950E-02 -0.666666666666666970E-02 -0.166319285308019998E-03 0.755721766131254007E-02 -0.125511384896241995E-10 0.000000000000000000E+00 0.000000000000000000E+00 -0.666666666666666970E-02 -0.166854277196930989E-03 0.000000000000000000E+00 -0.695070273739865966E-09 0.759121319033985009E-02 -0.666666666666666970E-02 -0.162062001821340992E-03 0.755721765238263031E-02 -0.411599097514774006E-11 -0.164602607032878005E-03 -0.607614054874119969E-09 0.822717713004762069E-02 0.926588367541618038E-02 -0.622883811249193005E-03 -0.666666666666666970E-02 -0.671594835876209036E-03 0.921711542242288975E-02 -0.666666666666666970E-02 -0.435047603237387977E-03 0.921711541881104976E-02 -0.671594832109726034E-03 -0.435050784145465008E-03 -0.622883807637388031E-03 0.926588367164974010E-02 -0.666666666666666970E-02 0.755721765237517013E-02 -0.411128320041531016E-11 -0.606929330558691026E-09 0.822717712936565058E-02 -0.435040480193840022E-03 -0.666666666666666970E-02 0.921687628881386924E-02 -0.671540836666198004E-03 -0.435057906821179974E-03 -0.622872536362236969E-03 0.926585804144416936E-02 -0.166666666666667011E-01 -0.435069629630581997E-03 0.194633698669611013E-01 -0.118122796424918004E-02 -0.435069154494976983E-03 -0.125613726459116001E-02 0.193884422615678009E-01 -0.166666666666667011E-01 -0.139671085486954007E-03 0.172964744640674989E-01 -0.239732978890832008E-07 -0.141641486800695994E-03 -0.121305359387927990E-05 0.178147059436198005E-01 -0.141641814770074998E-03 0.178147059816575991E-01 -0.121308374397845994E-05 -0.166666666666667011E-01 -0.139671142305139987E-03 -0.239739107040978989E-07 0.172964744651716990E-01 -0.166666666666667011E-01 -0.435068671068379023E-03 0.000000000000000000E+00 0.193848780140391010E-01 -0.125250859257347992E-02 -0.435070113055705985E-03 -0.117767774816952006E-02 0.194597410164681003E-01 -0.166666666666667011E-01 0.172969944943952014E-01 -0.779115312058558027E-07 -0.166666666666667011E-01 -0.755324935173564974E-05 0.179004996941746000E-01 -0.166666666666667011E-01 0.178976999748920000E-01 -0.387667548978359018E-05 -0.233628102655817991E-06 0.178135930268625009E-01 -0.166666666666667011E-01 -0.435068785870240020E-03 0.193888089807052987E-01 -0.125652619301678007E-02 -0.435069998254330982E-03 -0.118160505927794005E-02 0.194637572604987007E-01 -0.435033736890810016E-03 -0.166666666666667011E-01 0.193943160872508992E-01 -0.126292016425918010E-02 0.000000000000000000E+00 -0.435105043203752020E-03 -0.118838524744318999E-02 0.194704567784157005E-01 -0.166666666666667011E-01 -0.139670810780983011E-03 0.000000000000000000E+00 0.172964393749100998E-01 -0.239726372267789987E-07 -0.141640262301834013E-03 -0.121310025861443004E-05 0.178147059964809014E-01 -0.166666666666667011E-01 -0.435069265953867983E-03 0.193887440338862994E-01 -0.125644950186007995E-02 -0.435069518171818988E-03 -0.118152293796727997E-02 0.194636762447145013E-01 -0.140348173477511997E-03 0.178144318724902002E-01 -0.933813955352833025E-06 -0.166666666666667011E-01 -0.140348162575281002E-03 -0.933775488811803979E-06 0.178144292153939003E-01 -0.166666666666667011E-01 -0.139703533487054989E-03 0.172964344581288988E-01 -0.243260553268771015E-07 -0.141829183573385009E-03 -0.123048280177751006E-05 0.178147222203411011E-01 -0.435051982399214975E-03 -0.166666666666667011E-01 0.193933724724300004E-01 -0.126258834031118010E-02 -0.435086800763898984E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.118819422876379008E-02 0.194685490108550997E-01 0.000000000000000000E+00 -0.435025946733501019E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 0.193872403842459996E-01 -0.125578622091835007E-02 -0.435112831393176015E-03 -0.118149517659288990E-02 0.194634742330271997E-01 -0.166666666666667011E-01 0.172964350463347995E-01 -0.239721729446109010E-07 -0.121308719330683991E-05 0.178147059846293990E-01 -0.166666666666667011E-01 -0.435069341371819001E-03 0.193887901446231983E-01 -0.125649495627712989E-02 -0.435069442753911013E-03 -0.118156624050173007E-02 0.194637211303591989E-01 -0.166666666666667011E-01 0.172964137870020990E-01 -0.947281877378979960E-08 0.000000000000000000E+00 -0.166666666666667011E-01 -0.426824290648908980E-06 0.177495617930084996E-01 0.000000000000000000E+00 -0.140229676531456008E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 0.177491875365273014E-01 -0.500657647124818999E-07 -0.140479994479306001E-03 -0.522090189834258032E-06 0.178140176711019983E-01 -0.435033290824524978E-03 -0.166666666666667011E-01 0.193943004566385011E-01 -0.126291331604961991E-02 -0.435105489168504994E-03 -0.118838501644233006E-02 0.194704545970095996E-01 -0.435268249897141982E-03 -0.166666666666667011E-01 0.194803294031692005E-01 -0.120221696823341996E-02 -0.434870408625407019E-03 -0.126728096347563995E-02 0.194000952173805016E-01 -0.166666666666667011E-01 0.172950455057520984E-01 -0.209695222919133997E-07 0.000000000000000000E+00 -0.166666666666667011E-01 -0.104309476531645996E-05 0.178040398390986003E-01 0.000000000000000000E+00 -0.166666666666667011E-01 0.173086281050639987E-01 -0.234055546331482985E-07 -0.116921175397781010E-05 0.178149613334784990E-01 -0.166666666666667011E-01 -0.141165505762467993E-03 0.172962556489167016E-01 -0.473446914579739040E-05 -0.544658362791339019E-03 -0.346186312977599982E-04 0.183333438873188002E-01 -0.139754712611172996E-03 -0.166666666666667011E-01 0.176170424650436003E-01 -0.246162966049732998E-07 -0.140206415574070008E-03 -0.567757815085220049E-06 0.178140588186939987E-01 -0.165095245016433009E-03 0.178223322459852007E-01 -0.681146904710235036E-05 -0.166666666666667011E-01 -0.150451686795303991E-03 -0.294774223703715983E-06 0.176167018986788000E-01 -0.166666666666667011E-01 -0.421169644089434015E-03 0.189193940368806990E-01 -0.945437876210229950E-03 -0.448291096827642985E-03 -0.100548436568255006E-02 0.192537240725896008E-01 -0.166666666666667011E-01 0.173199119819372992E-01 -0.235984471741585014E-07 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.123676172594726990E-05 0.178172740172132006E-01 -0.166666666666667011E-01 -0.435171428458813985E-03 0.194709389718316986E-01 -0.119092785784032009E-02 -0.434967322589985023E-03 -0.126074169121839991E-02 0.193933111634515994E-01 -0.166666666666667011E-01 0.172958818583838006E-01 -0.240054389113918998E-07 -0.121597095988231006E-05 0.178147088549027999E-01 -0.434490077538828977E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 0.192819870996423989E-01 -0.115810409218265005E-02 -0.435647599097352997E-03 -0.109302488191087002E-02 0.193702791631770004E-01 -0.166666666666667011E-01 0.172964231611969985E-01 -0.237439154341176999E-07 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.119986668002372008E-05 0.178139470404297014E-01 -0.166666666666667011E-01 -0.435069730380088988E-03 0.194633683526404012E-01 -0.118122901996727003E-02 -0.435069053745286004E-03 -0.125613277918751993E-02 0.193884385258774015E-01 -0.166666666666667011E-01 -0.139670526915550006E-03 0.172964750459083001E-01 -0.239672729870900994E-07 -0.141638257586788001E-03 -0.121275616494760993E-05 0.178147056890299998E-01 -0.141820432945145006E-03 0.178147229316946987E-01 -0.122951654344512007E-05 -0.166666666666667011E-01 -0.139702093156581013E-03 -0.243082078767443003E-07 0.172964754242462999E-01 -0.166666666666667011E-01 -0.435009064920768992E-03 0.193710715232028007E-01 -0.123960904534558994E-02 -0.435129707573877990E-03 -0.116591990546044996E-02 0.194474141497074995E-01 -0.166666666666667011E-01 0.172985028689811998E-01 -0.223410986359397003E-07 -0.166666666666667011E-01 -0.111564513924942993E-05 0.178095672953987004E-01 -0.166666666666667011E-01 0.193790139692226991E-01 -0.125224418475917997E-02 -0.118134652118899995E-02 0.194616264856153007E-01 -0.166666666666667011E-01 -0.435069813890499976E-03 0.194637397133314016E-01 -0.118159504407725001E-02 -0.435068970234672974E-03 -0.125650151292971989E-02 0.193888007309931006E-01 -0.435063659072025002E-03 -0.166666666666667011E-01 0.193957932538242998E-01 -0.126373923029710002E-02 0.000000000000000000E+00 -0.435075124949578019E-03 -0.118877268529867007E-02 0.194710559190932990E-01 -0.166666666666667011E-01 -0.139670655296777991E-03 0.172964394256112999E-01 -0.239709674501792008E-07 -0.141639364432354998E-03 -0.121301807421782997E-05 0.178147059272172996E-01 -0.166666666666667011E-01 -0.435069551479130975E-03 0.194636473054102983E-01 -0.118149814806462004E-02 -0.435069232646526994E-03 -0.125641682842023996E-02 0.193887163382647013E-01 -0.140347813456026012E-03 0.178144318351523001E-01 -0.933769427979961972E-06 -0.166666666666667011E-01 -0.140367975523149991E-03 -0.933880353406906957E-06 0.178144352914434007E-01 -0.166666666666667011E-01 -0.139694549084838003E-03 0.172964390046605986E-01 -0.242291801426243003E-07 -0.141777300691703004E-03 -0.122571812837964004E-05 0.178147201764872996E-01 -0.435047850309519990E-03 -0.166666666666667011E-01 0.193943737538610009E-01 -0.126316205126679004E-02 0.000000000000000000E+00 -0.435090932343969020E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.118860422039771007E-02 0.194699012602338010E-01 0.000000000000000000E+00 -0.435045457437436027E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.193878817571609997E-01 -0.125604652907840004E-02 -0.435093324868215015E-03 -0.118146947412013995E-02 0.194635297377060007E-01 -0.139670603133990989E-03 -0.166666666666667011E-01 0.172964396533356012E-01 -0.239704033946370987E-07 -0.141639061330424995E-03 -0.121298992344433003E-05 0.178147059349155999E-01 -0.166666666666667011E-01 -0.435069549163963982E-03 0.194637077506569015E-01 -0.118155678661577995E-02 -0.435069234961695992E-03 -0.125647743137780000E-02 0.193887749967509987E-01 -0.139700055968098002E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.172964161546672983E-01 -0.947619561016756952E-08 -0.141286732534576009E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.426994435274359988E-06 0.177495882894109988E-01 0.000000000000000000E+00 -0.140193998721740011E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.177492151422632000E-01 -0.498003721032979000E-07 -0.140435023641922004E-03 -0.519120801586516000E-06 0.178140167474564008E-01 -0.435057327600382997E-03 -0.166666666666667011E-01 0.193955726939472016E-01 -0.126364253425448000E-02 -0.435081456064144001E-03 -0.118875145877563992E-02 0.194710320647638012E-01 -0.434101287475744974E-03 -0.166666666666667011E-01 0.193457297954395986E-01 -0.123123427263146005E-02 -0.436034461490277003E-03 -0.117022370416927993E-02 0.194484060865461987E-01 -0.166666666666667011E-01 0.172978496798759998E-01 -0.927299841537387054E-08 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.415800227862941001E-06 0.177484703644168011E-01 -0.166666666666667011E-01 0.193674050743437992E-01 -0.124713171719898993E-02 -0.118115734794936003E-02 0.194600874520639011E-01 -0.166666666666667011E-01 -0.141885046454967004E-03 0.173019565602799998E-01 -0.104149302119304992E-04 -0.548756258214203995E-03 -0.211535543706460008E-03 0.185091585802883984E-01 -0.140739916525857997E-03 -0.166666666666667011E-01 0.175467171312232995E-01 -0.284331238335905014E-07 -0.142759739737579006E-03 -0.795488176607448023E-06 0.178133916912854989E-01 -0.159214075622715010E-03 0.178183546415562992E-01 -0.506772449993064000E-05 -0.166666666666667011E-01 -0.147688204700038996E-03 -0.193011674962318992E-06 0.175471635134074010E-01 -0.166666666666667011E-01 -0.390248902500896981E-03 0.183597810468748013E-01 -0.389827888875239994E-03 -0.472748684898519024E-03 -0.455376277614669004E-03 0.187227106527687003E-01 -0.166666666666667011E-01 0.184527954209229988E-01 -0.738253265195438005E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.918001551148692977E-03 0.188197523488804995E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.405995886601362996E-03 0.000000000000000000E+00 0.187454059199008011E-01 -0.889550329385700050E-03 -0.461282908632618994E-03 -0.106896425909096997E-02 0.192970827972911013E-01 -0.166666666666667011E-01 0.173071968997474991E-01 -0.234787153019127012E-07 -0.117638457460658001E-05 0.178149781133057003E-01 -0.141084828730741996E-03 -0.166666666666667011E-01 0.177480973766994017E-01 -0.558748334339287969E-07 -0.141568067840571008E-03 -0.593631384362742965E-06 0.178140924667235002E-01 -0.139670647362560011E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.172964277750680996E-01 -0.239358302523397992E-07 -0.141638396626583005E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.121100320738714998E-05 0.178145893661829993E-01 -0.166666666666667011E-01 -0.139670792138544994E-03 0.172964364510580990E-01 -0.239726277356469989E-07 -0.141640184766718993E-03 -0.121310638575149002E-05 0.178147060027850010E-01 -0.166666666666667011E-01 -0.140348083609600002E-03 0.178144298829951003E-01 -0.933774429165531998E-06 -0.140348091728343988E-03 -0.933803076573281956E-06 0.178144318618429984E-01 -0.435069485970576990E-03 0.194636360494852001E-01 -0.118148312544618999E-02 -0.166666666666667011E-01 -0.435069298155133020E-03 -0.125640935261609995E-02 0.193887056174587014E-01 -0.166666666666667011E-01 -0.139670745411716994E-03 0.172964393771044000E-01 -0.239719351623946988E-07 -0.141639884975219995E-03 -0.121306573571080992E-05 0.178147059632727985E-01 -0.166666666666667011E-01 0.193887976520876991E-01 -0.125650164472155992E-02 -0.166666666666667011E-01 -0.118157208029700004E-02 0.194637273004373001E-01 -0.166666666666667011E-01 0.193887977203392008E-01 -0.125650167109372002E-02 -0.118157208884185996E-02 0.194637273863749009E-01 -0.166666666666667011E-01 -0.435069389748935983E-03 0.193887814175709994E-01 -0.125648501071033009E-02 -0.435069394376801999E-03 -0.118155580222921008E-02 0.194637107296697989E-01 -0.140178146762424992E-03 -0.166666666666667011E-01 0.177492251547946012E-01 -0.496800708557238970E-07 0.000000000000000000E+00 -0.140415043495099005E-03 -0.517779583567776022E-06 0.178140139542000003E-01 -0.166666666666667011E-01 -0.435069391170850978E-03 0.193884418595630000E-01 -0.125613777078238003E-02 -0.435069392954887005E-03 -0.118121619467206006E-02 0.194633634756040015E-01 -0.166666666666667011E-01 -0.139670761471267005E-03 0.172964743992243992E-01 -0.239698239143705995E-07 -0.141639617585322011E-03 -0.121288295374220008E-05 0.178147057813429002E-01 -0.141639619900249991E-03 0.178147057815410993E-01 -0.121288316535549008E-05 -0.166666666666667011E-01 -0.139670761872411997E-03 -0.239698282191903987E-07 0.172964743992280005E-01 -0.166666666666667011E-01 -0.435069391157123027E-03 0.193887088952683015E-01 -0.125641082509626010E-02 -0.435069392968615010E-03 -0.118148323088716994E-02 0.194636365300335998E-01 -0.139670783971901987E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.172964393406603995E-01 -0.239438477767506995E-07 -0.141639278555754001E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.121143526565240011E-05 0.178146131748509011E-01 0.000000000000000000E+00 -0.435064994003248019E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.193885566142219003E-01 -0.125634391561759004E-02 -0.435073790061057985E-03 -0.118148087424704991E-02 0.194636165590847988E-01 -0.139670564073325994E-03 -0.166666666666667011E-01 0.172964393993381010E-01 -0.239699852667253984E-07 -0.141638838042723001E-03 -0.121296980432410002E-05 0.178147058666821984E-01 -0.166666666666667011E-01 -0.435069390409400993E-03 0.193887087203693000E-01 -0.125641066243104005E-02 -0.435069393716336990E-03 -0.118148308295969000E-02 0.194636363738771995E-01 -0.139670603963582999E-03 -0.166666666666667011E-01 0.172964393580567997E-01 -0.239419221487562986E-07 -0.141638239923676007E-03 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.121134061697532001E-05 0.178146131028351987E-01 0.000000000000000000E+00 -0.435064995135525016E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.193885567877557993E-01 -0.125634406896881997E-02 -0.435073788928811995E-03 -0.118148100749493009E-02 0.194636167020634000E-01 -0.139670744060081990E-03 -0.166666666666667011E-01 0.172964393838881993E-01 -0.239719201961261003E-07 -0.141639877103680014E-03 -0.121306498448360998E-05 0.178147059625157998E-01 -0.435069391187604992E-03 -0.166666666666667011E-01 0.193887118630177990E-01 -0.125641385907748998E-02 -0.435069392938132991E-03 -0.118148619756011002E-02 0.194636395637266015E-01 -0.166666666666667011E-01 0.172965077542917012E-01 -0.927568247376278936E-07 0.000000000000000000E+00 -0.166666666666667011E-01 -0.102046200973755008E-05 0.173051339160060005E-01 0.000000000000000000E+00 -0.166666666666667011E-01 0.193887977549710017E-01 -0.125650168627743997E-02 -0.118157208932238010E-02 0.194637273908665996E-01 -0.166666666666667011E-01 -0.435069390872567011E-03 0.193884418484903988E-01 -0.125613776548786000E-02 -0.435069393253171026E-03 -0.118121619377422010E-02 0.194633634734872989E-01 -0.139670761455125999E-03 -0.166666666666667011E-01 0.172964743993230009E-01 -0.239698237345604007E-07 -0.141639617491158998E-03 -0.121288294467888995E-05 0.178147057813337999E-01 -0.141639617495969007E-03 0.178147057813342995E-01 -0.121288294511917005E-05 -0.166666666666667011E-01 -0.139670761455959995E-03 -0.239698237435694994E-07 0.172964743993230009E-01 -0.166666666666667011E-01 -0.435069391186033983E-03 0.193887118629635993E-01 -0.125641385905383009E-02 -0.435069392939704000E-03 -0.118148619755951002E-02 0.194636395637197007E-01 -0.166666666666667011E-01 0.172965077542917012E-01 -0.927568247375443947E-07 0.000000000000000000E+00 -0.166666666666667011E-01 -0.102046200973782007E-05 0.173051339160060005E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.139674680709096002E-03 0.000000000000000000E+00 0.173041376249854009E-01 -0.235410513736254011E-07 -0.141584640574228987E-03 -0.117464392253343004E-05 0.178146677207011994E-01 -0.166666666666667011E-01 0.193887977549710017E-01 -0.125650168627745992E-02 -0.118157208932238010E-02 0.194637273908665996E-01 -0.139674680709097004E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 0.173041376249854009E-01 -0.235410513736612015E-07 -0.141584640574232999E-03 -0.117464392253384995E-05 0.178146677207011994E-01 -0.139700009140160993E-03 -0.166666666666667011E-01 0.172964151618227001E-01 -0.947777749626817048E-08 -0.141286602860218007E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.427075077777844986E-06 0.177495996071506991E-01 -0.166666666666667011E-01 -0.139670794784869993E-03 0.172964386794358001E-01 -0.239725105714824009E-07 -0.141640177118637010E-03 -0.121309559766888995E-05 0.178147059913013987E-01 -0.166666666666667011E-01 -0.435069295710136016E-03 0.193887241621244003E-01 -0.125642846904032001E-02 -0.435069488415573019E-03 -0.118150189715466995E-02 0.194636550484258014E-01 -0.140349307522104008E-03 0.178144320174323002E-01 -0.933958710839114049E-06 -0.166666666666667011E-01 -0.140349299171436005E-03 -0.933929273088272012E-06 0.178144299843184008E-01 -0.166666666666667011E-01 -0.435068659479091984E-03 0.193848776262358011E-01 -0.125250842953309989E-02 -0.435070124644940006E-03 -0.117767775442942997E-02 0.194597409762624005E-01 -0.166666666666667011E-01 0.172969944928271988E-01 -0.779115314441779059E-07 -0.166666666666667011E-01 -0.755324939016156960E-05 0.179004996942138013E-01 -0.166666666666667011E-01 0.178976999748929992E-01 -0.387667548981126021E-05 -0.233628102656007991E-06 0.178135930268625009E-01 -0.166666666666667011E-01 -0.139686009556357012E-03 0.172964370559683007E-01 -0.241363115163189999E-07 -0.141728010347422987E-03 -0.122115135534191003E-05 0.178147135015842006E-01 -0.435039396134656023E-03 -0.166666666666667011E-01 0.193948601305157994E-01 -0.126336416797764991E-02 0.000000000000000000E+00 -0.435099385138650019E-03 -0.118873902082604994E-02 0.194708368874158995E-01 -0.166666666666667011E-01 -0.435069389255753991E-03 0.193884422504904987E-01 -0.125613821100521001E-02 -0.435069394869983992E-03 -0.118121665337718993E-02 0.194633639337775007E-01 -0.166666666666667011E-01 -0.139671380124092994E-03 0.172964743448792008E-01 -0.239764688220868006E-07 -0.141643188367998986E-03 -0.121320975071519003E-05 0.178147060922408014E-01 -0.141639844172888998E-03 0.178147058017989017E-01 -0.121290395200246995E-05 -0.166666666666667011E-01 -0.139670800627354991E-03 -0.239702482700813011E-07 0.172964743394126014E-01 -0.166666666666667011E-01 -0.435068235431626006E-03 0.193889053227481005E-01 -0.125663654535791008E-02 -0.435070548689864018E-03 -0.118172116532760008E-02 0.194638725033138005E-01 -0.161678337891911993E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.172964935301388005E-01 -0.110214153508716999E-06 -0.162422470311612014E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.120622187566448997E-05 0.173052150067069012E-01 0.000000000000000000E+00 -0.139675704206961997E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.173040340098522989E-01 -0.235576501589924013E-07 -0.141591382930956008E-03 -0.117568258902886992E-05 0.178146687262424989E-01 -0.435067411942580010E-03 -0.166666666666667011E-01 0.193886625538398985E-01 -0.125640355926819004E-02 -0.435071372170706019E-03 -0.118150457761581010E-02 0.194636501951265008E-01 -0.166666666666667011E-01 -0.435069317670580980E-03 0.193887257892274983E-01 -0.125642966436278992E-02 -0.435069466455139981E-03 -0.118150274119342008E-02 0.194636560434883997E-01 -0.161601801934664006E-03 -0.166666666666667011E-01 0.172965054748792993E-01 -0.926502681628198944E-07 -0.162351306455949001E-03 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.101988138676949999E-05 0.173051393629537996E-01 0.000000000000000000E+00 -0.139695639988150999E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.173041438368814991E-01 -0.237586532645376999E-07 -0.141701626147540003E-03 -0.118517343244098004E-05 0.178146776002247008E-01 -0.435031309394906995E-03 -0.166666666666667011E-01 0.193945788398839983E-01 -0.126324081474057004E-02 -0.435107470132360024E-03 -0.118873511395483993E-02 0.194708001084720993E-01 -0.435268203178909021E-03 -0.166666666666667011E-01 0.194803280841454006E-01 -0.120221471825894999E-02 -0.434870455402753989E-03 -0.126728100453280998E-02 0.194000950994309002E-01 -0.166666666666667011E-01 0.172950455023187996E-01 -0.209695226067755011E-07 0.000000000000000000E+00 -0.166666666666667011E-01 -0.104309478839939992E-05 0.178040398395854990E-01 0.000000000000000000E+00 -0.166666666666667011E-01 0.173086281051332003E-01 -0.234055546297331013E-07 -0.116921175366159004E-05 0.178149613334783012E-01 -0.166666666666667011E-01 -0.141165531303238012E-03 0.172962108365329013E-01 -0.473445694664380988E-05 -0.544660594244451043E-03 -0.346550124406059992E-04 0.183333673483180998E-01 -0.139754726222278999E-03 -0.166666666666667011E-01 0.176170084219189997E-01 -0.246149781576928007E-07 -0.140206540519620008E-03 -0.567788699156871954E-06 0.178140588497450007E-01 -0.165095492820405989E-03 0.178223326710732002E-01 -0.681196695094317026E-05 -0.166666666666667011E-01 -0.150450799069272987E-03 -0.294765827682543002E-06 0.176166680355560011E-01 -0.166666666666667011E-01 -0.421169440873413019E-03 0.189193905894523007E-01 -0.945436157371731009E-03 -0.448291280314589975E-03 -0.100548420996315999E-02 0.192537236307863992E-01 -0.166666666666667011E-01 0.173199119681975988E-01 -0.235984477489150007E-07 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.123676178966298004E-05 0.178172740173286014E-01 -0.166666666666667011E-01 -0.435171428461128974E-03 0.194709389729267983E-01 -0.119092785894799004E-02 -0.434967322587667975E-03 -0.126074169224678996E-02 0.193933111644412001E-01 -0.166666666666667011E-01 0.172958818582512990E-01 -0.240054389194565998E-07 -0.121597096057541996E-05 0.178147088549035007E-01 -0.434490077563752020E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 0.192819871000558009E-01 -0.115810409202821997E-02 -0.435647599072524984E-03 -0.109302488139546004E-02 0.193702791629623006E-01 -0.435071227105071981E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.194623470555931009E-01 -0.118137412384183003E-02 -0.435067557009957988E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.125600599101201998E-02 0.193876331668423017E-01 -0.166666666666667011E-01 -0.139670755029073005E-03 0.172964393832697010E-01 -0.239720378568450991E-07 -0.141639940417128004E-03 -0.121307076941768003E-05 0.178147059675646015E-01 -0.166666666666667011E-01 -0.435069370632784017E-03 0.193887126423957987E-01 -0.125641513719206011E-02 -0.435069413492952990E-03 -0.118148776859817008E-02 0.194636409705596994E-01 -0.140350151817307989E-03 0.178144321253345984E-01 -0.934063912396124954E-06 -0.166666666666667011E-01 -0.140350134684648006E-03 -0.934057299894995007E-06 0.178144316718926998E-01 -0.166666666666667011E-01 -0.435068157528115975E-03 0.193838368298109992E-01 -0.125145454139508993E-02 -0.435070626592773998E-03 -0.117665452507550007E-02 0.194586918508348992E-01 -0.166666666666667011E-01 0.172970581684627998E-01 -0.692387039594140975E-18 -0.166666666666667011E-01 -0.116362368857371003E-16 0.173226956581742017E-01 -0.166666666666667011E-01 0.173227088035448999E-01 -0.227142423077076990E-07 -0.109342148655364995E-05 0.178145868761214014E-01 -0.166666666666667011E-01 -0.139672882575824002E-03 0.172964387861730996E-01 -0.239948730000151010E-07 -0.141652224900244998E-03 -0.121419432935652996E-05 0.178147068681758017E-01 -0.435063160018721003E-03 -0.166666666666667011E-01 0.193888839247167016E-01 -0.125673333404555001E-02 0.000000000000000000E+00 -0.435075623983676026E-03 -0.118189505882440008E-02 0.194640013018383996E-01 -0.166666666666667011E-01 -0.435069389648734019E-03 0.000000000000000000E+00 0.193884419847699004E-01 -0.125613793152611996E-02 -0.435069394477004018E-03 -0.118121637448865007E-02 0.194633636498748007E-01 -0.166666666666667011E-01 -0.139670964419093003E-03 0.172964743771341009E-01 -0.239720016692284015E-07 -0.141640788952219994E-03 -0.121299004622935002E-05 0.178147058760876990E-01 -0.141639744525080005E-03 0.178147057920136007E-01 -0.121289465915329990E-05 -0.166666666666667011E-01 -0.139670783426588003E-03 -0.239700609664733988E-07 0.172964743758160996E-01 -0.166666666666667011E-01 -0.435069015984241008E-03 0.193887360934393016E-01 -0.125644691154631004E-02 -0.435069768141048007E-03 -0.118152418161517991E-02 0.194636756633727002E-01 -0.161605811424037006E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.172965033247536003E-01 -0.935932238155109053E-07 -0.162355789101947996E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.103050516658616990E-05 0.173051502951346997E-01 0.000000000000000000E+00 -0.139674971988557002E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.173041442833562001E-01 -0.235436840180659000E-07 -0.141586201352507011E-03 -0.117475642480550998E-05 0.178146678139698006E-01 -0.435068868942317015E-03 -0.166666666666667011E-01 0.193886960011524015E-01 -0.125640827839761994E-02 -0.435069915182551979E-03 -0.118148826831425001E-02 0.194636394345385003E-01 -0.166666666666667011E-01 -0.435069350378355024E-03 0.193887155593089992E-01 -0.125641852657909004E-02 -0.435069433747378026E-03 -0.118149137295133996E-02 0.194636445794403000E-01 -0.161601725677973997E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.172965069829389001E-01 -0.926650122837621983E-07 -0.162351498594983989E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.102023342811811993E-05 0.173051439965640000E-01 0.000000000000000000E+00 -0.139679244153132989E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.173041479614229991E-01 -0.235877316475454016E-07 -0.141610020722497009E-03 -0.117688204166686002E-05 0.178146696122764014E-01 -0.435061149026493986E-03 -0.166666666666667011E-01 0.193888142839695986E-01 -0.125670280436314006E-02 -0.435077634883449980E-03 -0.118189407884959998E-02 0.194639921507386016E-01 -0.415397337630780993E-03 -0.166666666666667011E-01 0.188278985006655984E-01 -0.898332650045155024E-03 -0.453392799083327013E-03 -0.100006104572014989E-02 0.192410591480840984E-01 -0.166666666666667011E-01 0.173196351833987995E-01 -0.236101000931807013E-07 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.123805476906965002E-05 0.178172764972891993E-01 -0.166666666666667011E-01 0.172958803102313992E-01 -0.240055324643047990E-07 -0.121597905454717007E-05 0.178147088629614994E-01 -0.166666666666667011E-01 -0.139361332584411003E-03 0.172963780471655990E-01 -0.206406041294607990E-07 -0.139853947917575004E-03 -0.104886506051046009E-05 0.178144680355106985E-01 -0.482519288981585995E-03 -0.166666666666667011E-01 0.184437042794661984E-01 -0.182960367117892994E-03 -0.375083574717954013E-03 -0.210197986405779994E-03 0.183325256678834002E-01 -0.143207812762676998E-03 0.178148150557865988E-01 -0.128505524246536990E-05 -0.166666666666667011E-01 -0.199761155404034002E-03 -0.199498340007143991E-05 0.178294573883331017E-01 -0.166666666666667011E-01 -0.433603776206709980E-03 0.193497604591337004E-01 -0.124503388545287009E-02 -0.436528045694049990E-03 -0.119072564830830993E-02 0.194675615509681994E-01 -0.166666666666667011E-01 0.172951192308572997E-01 -0.214703230748565997E-07 0.000000000000000000E+00 -0.166666666666667011E-01 -0.107141838335927994E-05 0.178058595670048010E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.434603197488181014E-03 0.000000000000000000E+00 0.193058537142562006E-01 -0.118063268562849000E-02 -0.435534875266554988E-03 -0.111369468800605006E-02 0.193919603585517991E-01 -0.166666666666667011E-01 0.173056689022044012E-01 -0.235558337256563013E-07 -0.118363055389575001E-05 0.178149857852359997E-01 -0.435171466645126008E-03 -0.166666666666667011E-01 0.194709518251613006E-01 -0.119094077856059005E-02 -0.434967284379077007E-03 -0.126075351291434000E-02 0.193933220870970008E-01 -0.435071725054391020E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.194634518078929016E-01 -0.118146952991411008E-02 -0.435067059054058004E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.125632584199591004E-02 0.193884910636192009E-01 -0.166666666666667011E-01 -0.139670745136767009E-03 0.172964396823254013E-01 -0.239719121263430998E-07 -0.141639880244726003E-03 -0.121306391322725994E-05 0.178147059609039017E-01 -0.166666666666667011E-01 -0.140348087747312988E-03 0.178144318030939997E-01 -0.933800582768691986E-06 -0.140348087981950005E-03 -0.933801410703608978E-06 0.178144318602838012E-01 -0.435069394684114985E-03 0.194636364171987992E-01 -0.118148312778892993E-02 -0.166666666666667011E-01 -0.435069389441622998E-03 -0.125641069437981007E-02 0.193887087332374995E-01 -0.166666666666667011E-01 -0.139670745366996012E-03 0.172964393825588009E-01 -0.239719343252174001E-07 -0.141639884660971991E-03 -0.121306568223363996E-05 0.178147059632162985E-01 -0.166666666666667011E-01 0.193887976553534999E-01 -0.125650164603376005E-02 -0.166666666666667011E-01 -0.118157208059822003E-02 0.194637273034572005E-01 -0.166666666666667011E-01 0.193887977225202998E-01 -0.125650167205010994E-02 -0.118157208887189995E-02 0.194637273866554994E-01 -0.166666666666667011E-01 -0.435069390858170975E-03 0.193887118569476997E-01 -0.125641385953187998E-02 -0.435069393267567008E-03 -0.118148620273378011E-02 0.194636395676883005E-01 -0.139674686768931008E-03 -0.166666666666667011E-01 0.173041492305231991E-01 -0.235404449719397016E-07 0.000000000000000000E+00 -0.141584559768768005E-03 -0.117458831050516992E-05 0.178146676653484007E-01 -0.166666666666667011E-01 -0.139670743889800991E-03 0.000000000000000000E+00 0.172964393844355011E-01 -0.239719183336094004E-07 -0.141639876115250000E-03 -0.121306489168701010E-05 0.178147059624313986E-01 -0.166666666666667011E-01 -0.140348087304844010E-03 0.178144318418145993E-01 -0.933801043769264029E-06 -0.140348087380234008E-03 -0.933801309788676977E-06 0.178144318601899006E-01 -0.435069392936427975E-03 0.194636364108725993E-01 -0.118148311418031005E-02 -0.166666666666667011E-01 -0.435069391189310008E-03 -0.125641070624249993E-02 0.193887087796953995E-01 -0.166666666666667011E-01 -0.435069391192276006E-03 0.193887815134154012E-01 -0.125648507879496003E-02 -0.435069392933461977E-03 -0.118155584780088997E-02 0.194637107833945007E-01 -0.139700198050289988E-03 -0.166666666666667011E-01 0.172964151566850008E-01 -0.947849280649777055E-08 -0.141287467726875003E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.427106383900035001E-06 0.177495992954622012E-01 0.000000000000000000E+00 -0.140178145665099013E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.177492248107081994E-01 -0.496797031657574988E-07 -0.140415043450978997E-03 -0.517779495462221951E-06 0.178140139541173997E-01 -0.435069391108214992E-03 -0.166666666666667011E-01 0.193887087862484007E-01 -0.125641071469571007E-02 -0.435069393017522990E-03 -0.118148312365616006E-02 0.194636364200341007E-01 -0.166666666666667011E-01 -0.139670745360696987E-03 0.172964393844495003E-01 -0.239719341341385992E-07 -0.141639884605172008E-03 -0.121306566857870991E-05 0.178147059632026983E-01 -0.435069390272270994E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.193887086899636012E-01 -0.125641067599288007E-02 -0.435069393853468019E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.118148311480717992E-02 0.194636363313261986E-01 0.000000000000000000E+00 -0.435069390212755017E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.193887087459734993E-01 -0.125641069148615990E-02 -0.435069393912983020E-03 -0.118148311376017998E-02 0.194636364065413001E-01 -0.139670743899566996E-03 -0.166666666666667011E-01 0.172964393844273999E-01 -0.239719184390481014E-07 -0.141639876171704000E-03 -0.121306489688977996E-05 0.178147059624365993E-01 -0.435069391188922026E-03 -0.166666666666667011E-01 0.193887118630630996E-01 -0.125641385909724002E-02 -0.435069392936816011E-03 -0.118148619756053004E-02 0.194636395637322983E-01 -0.166666666666667011E-01 0.172965077542917012E-01 -0.927568247378449987E-07 0.000000000000000000E+00 -0.166666666666667011E-01 -0.102046200973719009E-05 0.173051339160060005E-01 0.000000000000000000E+00 -0.166666666666667011E-01 0.193887977549708004E-01 -0.125650168627737991E-02 -0.118157208932236990E-02 0.194637273908664990E-01 -0.166666666666667011E-01 -0.435069391170655984E-03 0.193884418583357004E-01 -0.125613776952834995E-02 -0.435069392955081998E-03 -0.118121619344725010E-02 0.194633634743562010E-01 -0.139670761454458998E-03 -0.166666666666667011E-01 0.172964743993847016E-01 -0.239698237234471001E-07 -0.141639617486673003E-03 -0.121288294398923001E-05 0.178147057813330990E-01 -0.141639617486714989E-03 0.178147057813330990E-01 -0.121288294399298003E-05 -0.166666666666667011E-01 -0.139670761454465991E-03 -0.239698237234977997E-07 0.172964743993847016E-01 -0.166666666666667011E-01 -0.435069391188907986E-03 0.193887118630626000E-01 -0.125641385909704010E-02 -0.435069392936829997E-03 -0.118148619756053004E-02 0.194636395637322011E-01 -0.166666666666667011E-01 0.172965077542917012E-01 -0.927568247378397048E-07 0.000000000000000000E+00 -0.166666666666667011E-01 -0.102046200973721995E-05 0.173051339160060005E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.139674680709099010E-03 0.000000000000000000E+00 0.173041376249854009E-01 -0.235410513737083011E-07 -0.141584640574244004E-03 -0.117464392253482002E-05 0.178146677207011994E-01 -0.166666666666667011E-01 0.193887977549708004E-01 -0.125650168627737991E-02 -0.118157208932236990E-02 0.194637273908664990E-01 -0.139674680709099010E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 0.173041376249854009E-01 -0.235410513737084004E-07 -0.141584640574244004E-03 -0.117464392253482002E-05 0.178146677207011994E-01 -0.161601720191613987E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.172965076669214998E-01 -0.926701710590744007E-07 -0.162351552424590994E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.102033061970779005E-05 0.173051453907301991E-01 -0.166666666666667011E-01 -0.140025227157220011E-03 0.172964538609525001E-01 -0.280836077054835003E-07 -0.143700853603787992E-03 -0.141493042418215998E-05 0.178149166877635996E-01 -0.166666666666667011E-01 -0.434397138667670006E-03 0.193663283421353014E-01 -0.124681061178207996E-02 -0.435740193435873980E-03 -0.118155329481477991E-02 0.194610974716315013E-01 -0.435888399868572985E-03 0.193018909827777987E-01 -0.102736762162752003E-02 -0.166666666666667011E-01 -0.434248105255362999E-03 -0.108835030296548004E-02 0.192105826235978994E-01 -0.166666666666667011E-01 -0.142766725250475000E-03 0.173186084279148000E-01 -0.107223373775717997E-06 -0.158331602080537010E-03 -0.502583390911269042E-05 0.178188218385040009E-01 -0.166666666666667011E-01 0.191887710237797007E-01 -0.116955156282324006E-02 -0.166666666666667011E-01 -0.115321249659207001E-02 0.193095009677901989E-01 -0.166666666666667011E-01 0.192716415760125007E-01 -0.120400166465171009E-02 -0.117888093553793010E-02 0.194469246141813004E-01 -0.166666666666667011E-01 -0.139668701380672987E-03 0.172963839829421012E-01 -0.239535905300788984E-07 -0.141628653565274010E-03 -0.121228849838231003E-05 0.178147051859353991E-01 -0.435070871110831980E-03 -0.166666666666667011E-01 0.194629509412433989E-01 -0.118084397985067997E-02 0.000000000000000000E+00 -0.435067913007957025E-03 -0.125568351559249996E-02 0.193879990665461986E-01 -0.166666666666667011E-01 -0.435069390650039015E-03 0.000000000000000000E+00 0.193884418750694017E-01 -0.125613779809562003E-02 -0.435069393475699022E-03 -0.118121622922623989E-02 0.194633635071832997E-01 -0.166666666666667011E-01 -0.139670790826866987E-03 0.172964743947118006E-01 -0.239701379014204989E-07 -0.141639787000618012E-03 -0.121289838766225000E-05 0.178147057914929997E-01 -0.141639653189466996E-03 0.178147057839792011E-01 -0.121288622158938009E-05 -0.166666666666667011E-01 -0.139670767633548004E-03 -0.239698902061346000E-07 0.172964743947353998E-01 -0.166666666666667011E-01 -0.435069338865947026E-03 0.193887012207536007E-01 -0.125640423249911005E-02 -0.435069445259782012E-03 -0.118147761261617004E-02 0.194636302225951012E-01 -0.161594071152451998E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.172965071943530987E-01 -0.911198389499341023E-07 -0.162344730250092007E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.100404518995791990E-05 0.173051412553598995E-01 0.000000000000000000E+00 -0.139674707461267010E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.173041614291120985E-01 -0.235399540925235988E-07 -0.141584554759037012E-03 -0.117453693858401004E-05 0.178146676069954012E-01 -0.435069368728934001E-03 -0.166666666666667011E-01 0.193887057094737983E-01 -0.125640804230569007E-02 -0.435069415396801976E-03 -0.118148083984231998E-02 0.194636339567480984E-01 -0.166666666666667011E-01 -0.435069379906770002E-03 0.193887120222560987E-01 -0.125641427645285992E-02 -0.435069404218967981E-03 -0.118148677817681990E-02 0.194636400648426008E-01 -0.161601688606437998E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.172965075862324992E-01 -0.926646811008730053E-07 -0.162351530879915987E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.102027850567167008E-05 0.173051453753680015E-01 0.000000000000000000E+00 -0.139674509841842008E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.173041492584409991E-01 -0.235385724457535989E-07 -0.141583570403258998E-03 -0.117449718296302996E-05 0.178146674600714004E-01 -0.435069784091323016E-03 -0.166666666666667011E-01 0.194629197189570002E-01 -0.118078836231266999E-02 -0.435069000033927022E-03 -0.125568468813201011E-02 0.193879970350075009E-01 -0.371043793435626003E-03 -0.166666666666667011E-01 0.180438947424557995E-01 -0.288534286733046986E-03 -0.486290336507850022E-03 -0.560900198964180987E-03 0.187957070663157012E-01 -0.166666666666667011E-01 0.177374439856716991E-01 -0.214133613745063999E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.267021979533145016E-03 0.179665664602573008E-01 -0.166666666666667011E-01 0.193465910019366000E-01 -0.123791262554833999E-02 -0.118077342883706003E-02 0.194573127658026999E-01 -0.166666666666667011E-01 -0.139258097457669987E-03 0.172965222480018985E-01 -0.139396279238899008E-07 -0.139258097457669987E-03 -0.716212299733630966E-06 0.178142098387169996E-01 -0.448746699171803979E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.187014712985399990E-01 -0.470424366568322023E-03 -0.420614096697062990E-03 -0.416028776742435001E-03 0.185800358532766004E-01 -0.143647797085317999E-03 0.178142596258036995E-01 -0.784777563840381003E-06 -0.166666666666667011E-01 -0.289432939916457006E-03 -0.392675356000534004E-05 0.178691669573259014E-01 -0.166666666666667011E-01 -0.433838074751410983E-03 0.193348477920875991E-01 -0.122492972126329001E-02 -0.436295777836767011E-03 -0.116751654000400999E-02 0.194447290230824014E-01 -0.166666666666667011E-01 0.173080151788632987E-01 -0.244742519208693003E-04 0.000000000000000000E+00 -0.166666666666667011E-01 -0.133996458919445003E-03 0.178636488290151996E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.139757971205888996E-03 0.000000000000000000E+00 0.173348621828142008E-01 -0.229201529246951995E-07 -0.141718205587288988E-03 -0.107549290031851995E-05 0.178145692111312988E-01 -0.166666666666667011E-01 0.193848567630524014E-01 -0.125477293338469996E-02 -0.118151635484324991E-02 0.194632155620537983E-01 -0.140442473096456008E-03 -0.166666666666667011E-01 0.173484955878829006E-01 -0.294133603610693987E-07 -0.144782161123405996E-03 -0.133037267977017010E-05 0.178148273618939992E-01 -0.435071708554150990E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.194637484731423997E-01 -0.118167963393035996E-02 -0.435067075554543010E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.125644827298293993E-02 0.193888013904018990E-01 -0.166666666666667011E-01 -0.143972797957216006E-03 0.173505484374359004E-01 -0.481359947088405975E-04 -0.558385337699288022E-03 -0.438375372304428026E-03 0.187218716475071011E-01 -0.166666666666667011E-01 -0.139937861588156992E-03 0.174078894816094010E-01 -0.227986700629730998E-07 -0.141801880833368992E-03 -0.908696204756822957E-06 0.178144027207976993E-01 -0.143597614564550997E-03 0.178145584123696014E-01 -0.106316430421145007E-05 -0.166666666666667011E-01 -0.140417723829495007E-03 -0.268131472654641992E-07 0.174078938787147995E-01 -0.166666666666667011E-01 -0.434071246446273990E-03 0.193423068719833988E-01 -0.122826345821061989E-02 -0.436064307367708980E-03 -0.116769061492820003E-02 0.194456917758701990E-01 -0.166666666666667011E-01 0.172982030065722998E-01 -0.280949763883982991E-07 -0.166666666666667011E-01 -0.189517763026067993E-05 0.178271977135994013E-01 -0.166666666666667011E-01 0.178265008932220990E-01 -0.134657557070520009E-05 -0.925862840555429982E-06 0.178144080134048990E-01 -0.166666666666667011E-01 -0.434780858347314024E-03 0.193789566829400996E-01 -0.125221572856264000E-02 -0.435357659993625004E-03 -0.118148634697722000E-02 0.194624944824026008E-01 -0.139674508339166013E-03 -0.166666666666667011E-01 0.173041495432516984E-01 -0.235385803949071996E-07 0.000000000000000000E+00 -0.141583560353484997E-03 -0.117449727533117009E-05 0.178146675901048983E-01 -0.166666666666667011E-01 -0.139670744554657995E-03 0.172964393825064990E-01 -0.239719255799683993E-07 -0.141639879972026009E-03 -0.121306525215112003E-05 0.178147059627193002E-01 -0.166666666666667011E-01 -0.140348090038086001E-03 0.178144318165064999E-01 -0.933801051164943002E-06 -0.140348090679100993E-03 -0.933801691611406014E-06 0.178144318606493005E-01 -0.435069394145568998E-03 0.194636343488718000E-01 -0.118148110868380009E-02 -0.166666666666667011E-01 -0.435069389980168985E-03 -0.125640863633868995E-02 0.193887067279608985E-01 -0.166666666666667011E-01 -0.435069389466851027E-03 0.193887808952679999E-01 -0.125648448071444003E-02 -0.435069394658887010E-03 -0.118155528731557990E-02 0.194637102049162007E-01 -0.139700012758421009E-03 -0.166666666666667011E-01 0.172964152304735004E-01 -0.947803391050105932E-08 0.000000000000000000E+00 -0.141286629556396993E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.427087720755819975E-06 0.177496011881496003E-01 0.000000000000000000E+00 -0.140176041318299010E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.177492266992976995E-01 -0.496642740357883019E-07 -0.140412389918406994E-03 -0.517602927754366013E-06 0.178140137848851986E-01 -0.435070897570947004E-03 -0.166666666666667011E-01 0.194636050822587989E-01 -0.118148880805278990E-02 -0.435067886547593015E-03 -0.125633531890120007E-02 0.193886425635053997E-01 -0.166666666666667011E-01 -0.139669237663479995E-03 0.172964396325578015E-01 -0.239557312619153985E-07 -0.141631179766294000E-03 -0.121226845353324010E-05 0.178147051921049987E-01 -0.435070355283276994E-03 -0.166666666666667011E-01 0.194636498694088986E-01 -0.118156237601487000E-02 -0.435068428839514019E-03 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.125638673489450003E-02 0.193887513139221999E-01 0.000000000000000000E+00 -0.435071250841686976E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.194636876903557000E-01 -0.118157615129436001E-02 -0.435067533273078989E-03 -0.125640800692181000E-02 0.193887125978184013E-01 -0.139670568327848998E-03 -0.166666666666667011E-01 0.172964396537817998E-01 -0.239700195293086008E-07 -0.141638860134678002E-03 -0.121297096046396990E-05 0.178147058843058995E-01 -0.435090508141796988E-03 -0.166666666666667011E-01 0.194669892927517990E-01 -0.118530039028566991E-02 -0.435048274568398019E-03 -0.125913754523842994E-02 0.193915247098193008E-01 -0.166666666666667011E-01 0.172960984745374989E-01 -0.213609391405068995E-07 0.000000000000000000E+00 -0.166666666666667011E-01 -0.106367312891359006E-05 0.178056546914439999E-01 0.000000000000000000E+00 -0.166666666666667011E-01 0.173098191165470990E-01 -0.233466429807202985E-07 -0.116342624068746999E-05 0.178149495945450001E-01 -0.166666666666667011E-01 -0.435069713252911978E-03 0.194633204071541013E-01 -0.118118203409811997E-02 -0.435069070872497979E-03 -0.125608526726808992E-02 0.193883924273124987E-01 -0.139659157492490009E-03 -0.166666666666667011E-01 0.172964811115927999E-01 -0.238453400472707005E-07 -0.141572592558481008E-03 -0.120675012864579991E-05 0.178147008765491000E-01 -0.141538194636943009E-03 0.178146978498745015E-01 -0.120361727918045010E-05 -0.166666666666667011E-01 -0.139653196804214987E-03 -0.237816257834915005E-07 0.172964810525967003E-01 -0.166666666666667011E-01 -0.435101710749684009E-03 0.194659176105031993E-01 -0.118451620585209995E-02 -0.435037070057877023E-03 -0.125773914717291005E-02 0.193902105162142985E-01 -0.166666666666667011E-01 0.172962783354724016E-01 -0.218342668490586010E-07 0.000000000000000000E+00 -0.166666666666667011E-01 -0.109032448231275997E-05 0.178073744716654991E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.434677413069518002E-03 0.000000000000000000E+00 0.193010409680071000E-01 -0.117438144583154004E-02 -0.435460867464983019E-03 -0.110662888765544992E-02 0.193848162965538015E-01 -0.166666666666667011E-01 0.173066829056823003E-01 -0.235026937709481004E-07 -0.117850668810495007E-05 0.178149746842341007E-01 -0.434571088710914019E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 0.192763238295202005E-01 -0.115086473839517999E-02 -0.435566874653087015E-03 -0.108492248964389996E-02 0.193620926343772010E-01 -0.161599193400744999E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.172964883975278011E-01 -0.923842913120058025E-07 -0.162350867043699006E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.101863211158590009E-05 0.173051440367491989E-01 -0.166666666666667011E-01 -0.435069857911570023E-03 0.194633765819712985E-01 -0.118123987997616003E-02 -0.435068926213479003E-03 -0.125613733340555994E-02 0.193884432348929991E-01 -0.166666666666667011E-01 -0.139666077646513989E-03 0.172964743925189991E-01 -0.239195397112311000E-07 -0.141612586198800003E-03 -0.121041044884479009E-05 0.178147032886921998E-01 -0.141624350828127994E-03 0.178147048678200005E-01 -0.121149428933203998E-05 -0.166666666666667011E-01 -0.139668115474050994E-03 -0.239415570468690011E-07 0.172964744463235016E-01 -0.166666666666667011E-01 -0.435074201017785996E-03 0.194493813662804001E-01 -0.116767077284960006E-02 -0.435064583034155982E-03 -0.124201442563575998E-02 0.193746726576543987E-01 -0.166666666666667011E-01 0.173081303334150990E-01 -0.244868683853623015E-04 -0.166666666666667011E-01 -0.133924491696816997E-03 0.178635877956252014E-01 -0.166666666666667011E-01 0.173348654764515983E-01 -0.223062641979707998E-07 -0.104752868600600992E-05 0.178145411980304008E-01 -0.166666666666667011E-01 -0.435078250390669987E-03 0.194615680563809991E-01 -0.117967106973176005E-02 -0.435060533485638016E-03 -0.125408416347905010E-02 0.193864744856198992E-01 -0.139507036577244005E-03 -0.166666666666667011E-01 0.173052664826232015E-01 -0.217638922762432993E-07 0.000000000000000000E+00 -0.140641592938044988E-03 -0.108598665496889000E-05 0.178145535889315011E-01 -0.166666666666667011E-01 -0.139669898838359002E-03 0.172964394567505994E-01 -0.239628351519636015E-07 -0.141634997663802990E-03 -0.121261808844727004E-05 0.178147055141749007E-01 -0.166666666666667011E-01 -0.140550312154719988E-03 0.178144664524045003E-01 -0.935528049442035979E-06 -0.140352831077276009E-03 -0.934435514417231040E-06 0.178144323723362016E-01 -0.435067799539816001E-03 0.193882988824195003E-01 -0.125598375468558010E-02 -0.166666666666667011E-01 -0.435070984577867010E-03 -0.118114930153955010E-02 0.194632560036250013E-01 -0.166666666666667011E-01 -0.435066606879862022E-03 0.193887645192647000E-01 -0.125653020767648002E-02 -0.435072177221240990E-03 -0.118164241777987005E-02 0.194637770279468998E-01 -0.139525322491481995E-03 -0.166666666666667011E-01 0.172964018029032988E-01 -0.889114618565385951E-08 0.000000000000000000E+00 -0.140487892558472998E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.401772060623745995E-06 0.177503217815941985E-01 0.000000000000000000E+00 -0.139680840737954994E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.177499929679008003E-01 -0.461429010297917004E-07 -0.139808785077553992E-03 -0.474216741479885991E-06 0.178139754223665993E-01 -0.435490707175785019E-03 -0.166666666666667011E-01 0.194038024150119011E-01 -0.113136000392566002E-02 -0.434647496176783011E-03 -0.118521045340098008E-02 0.193210677155511990E-01 -0.166666666666667011E-01 -0.139356047055132996E-03 0.172964913696200991E-01 -0.205948470749521995E-07 -0.139855780445013013E-03 -0.104675229678023004E-05 0.178145500428797988E-01 -0.435278470720009021E-03 -0.166666666666667011E-01 0.194122813378182005E-01 -0.114346492577733008E-02 -0.434860167096455020E-03 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.119664955180343990E-02 0.193456144785291995E-01 0.000000000000000000E+00 -0.435507216634916999E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.194759659828977992E-01 -0.120235295974980010E-02 -0.434630954199676010E-03 -0.125613199041933006E-02 0.193898784861901013E-01 -0.139505276818091995E-03 -0.166666666666667011E-01 0.172964538951550992E-01 -0.222158902483321995E-07 -0.140684567793642991E-03 -0.112653151422048998E-05 0.178145939457222992E-01 -0.142980894339064004E-03 -0.166666666666667011E-01 0.173314540736370995E-01 -0.109865934611584997E-06 -0.158534103555776988E-03 -0.501444248842295042E-05 0.178188430592681017E-01 -0.166666666666667011E-01 0.191833679361077016E-01 -0.116695965850981001E-02 0.000000000000000000E+00 -0.166666666666667011E-01 -0.115255810703607004E-02 0.193073017106333011E-01 0.000000000000000000E+00 -0.166666666666667011E-01 0.172969314941423015E-01 -0.239425929155972008E-07 -0.121051376505055003E-05 0.178147034232610003E-01 -0.166666666666667011E-01 -0.435705239999077025E-03 0.190415627118516000E-01 -0.780602612534949010E-03 -0.434432059542404973E-03 -0.823587259891796967E-03 0.189719852118625000E-01 -0.147270631146331993E-03 -0.166666666666667011E-01 0.174669495140968994E-01 -0.100890785221647000E-03 -0.564704614718581016E-03 -0.477370877328551027E-03 0.187551209753295985E-01 -0.139439405915424988E-03 0.178140970873433002E-01 -0.575941857140058983E-06 -0.166666666666667011E-01 -0.139258097457669987E-03 -0.129861811110761993E-07 0.173691987550076005E-01 -0.166666666666667011E-01 -0.437167563287824015E-03 0.195228637533087016E-01 -0.126248812464285006E-02 -0.432956764514017976E-03 -0.125491318191218006E-02 0.193932789336802996E-01 -0.166666666666667011E-01 0.172966662190472999E-01 -0.121413434559184010E-06 0.000000000000000000E+00 -0.166666666666667011E-01 -0.972291627814046991E-05 0.179398884476568016E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.350105792758167024E-03 0.000000000000000000E+00 0.179358412564323000E-01 -0.564926759529481039E-05 -0.144639853954665995E-03 -0.314581136116586985E-06 0.178138319445949010E-01 -0.166666666666667011E-01 0.193703856784436003E-01 -0.124844632146220002E-02 -0.118120734927350994E-02 0.194604805412335984E-01 -0.431301131288118977E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 0.192675040792685999E-01 -0.119991065467065991E-02 -0.438790367540799003E-03 -0.117513383093279010E-02 0.194438480854111986E-01 -0.160851119958975013E-03 -0.166666666666667011E-01 0.172967403401987005E-01 -0.176667226349783012E-07 -0.161653225232298001E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.204715316608505000E-06 0.173054598817215001E-01 -0.166666666666667011E-01 -0.435630521898672989E-03 0.193599440196800983E-01 -0.109077905797679009E-02 -0.434507214005793005E-03 -0.113826801298030997E-02 0.192764807436608988E-01 -0.166666666666667011E-01 -0.139536670190099001E-03 0.173126649368885999E-01 -0.217199105747271010E-07 -0.140761521185631995E-03 -0.106837091099441006E-05 0.178145668888614987E-01 -0.559739137192076996E-03 0.187342109887928994E-01 -0.453812468447541010E-03 -0.166666666666667011E-01 -0.144437867213115988E-03 -0.570571543832924991E-04 0.173662394826720017E-01 -0.166666666666667011E-01 -0.141554480273217009E-03 0.173902726296936011E-01 -0.612977187708155032E-05 -0.543269256103985979E-03 -0.155576690925164998E-04 0.183343607413548014E-01 -0.166666666666667011E-01 0.190435224881784007E-01 -0.109812319463792004E-02 -0.166666666666667011E-01 -0.111910148431190000E-02 0.192002058717034990E-01 -0.166666666666667011E-01 0.191810336689013997E-01 -0.116112314020396998E-02 -0.117547931931091007E-02 0.194335300141191998E-01 -0.166666666666667011E-01 -0.435330233904635021E-03 0.194710205889397983E-01 -0.119426072742205005E-02 -0.434808333182332976E-03 -0.125626108720282010E-02 0.193894446405454002E-01 -0.140176366522793013E-03 -0.166666666666667011E-01 0.177492265794550012E-01 -0.496668406936042969E-07 0.000000000000000000E+00 -0.140412799232444004E-03 -0.517630044729356947E-06 0.178140137898683999E-01 -0.166666666666667011E-01 -0.435069391185710023E-03 0.193884418604889988E-01 -0.125613777143264004E-02 -0.435069392940028014E-03 -0.118121619509623009E-02 0.194633634760910008E-01 -0.166666666666667011E-01 -0.139670763696215990E-03 0.172964743991942983E-01 -0.239698478178091986E-07 -0.141639630426265001E-03 -0.121288412910185994E-05 0.178147057825197991E-01 -0.141639615804873989E-03 0.178147057811398994E-01 -0.121288279038434004E-05 -0.166666666666667011E-01 -0.139670761162715996E-03 -0.239698205914474004E-07 0.172964743991641003E-01 -0.166666666666667011E-01 -0.435069386965028005E-03 0.193887089999775983E-01 -0.125641102065624996E-02 -0.435069397160709978E-03 -0.118148348379683993E-02 0.194636367651009994E-01 -0.139670105523366001E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.172964392990828005E-01 -0.239365198406219005E-07 -0.141635362805994999E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.121107464035148993E-05 0.178146126504961991E-01 0.000000000000000000E+00 -0.435064986808901988E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.193885562632203010E-01 -0.125634370176468005E-02 -0.435073797255201974E-03 -0.118148076820924009E-02 0.194636164222987997E-01 -0.139670495544523001E-03 -0.166666666666667011E-01 0.172964394121789995E-01 -0.239692490633551009E-07 -0.141638442382618007E-03 -0.121293358252935993E-05 0.178147058331974001E-01 -0.166666666666667011E-01 -0.435069391760568977E-03 0.193887087038398000E-01 -0.125641061773689995E-02 -0.435069392365169006E-03 -0.118148301966637995E-02 0.194636363154463005E-01 -0.139670535416434004E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.172964393658350986E-01 -0.239411949136597000E-07 -0.141637844578005988E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.121130492256924999E-05 0.178146130952495999E-01 0.000000000000000000E+00 -0.435064996378640002E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.193885572616325011E-01 -0.125634453230068005E-02 -0.435073787685731975E-03 -0.118148144433873992E-02 0.194636171468661988E-01 -0.139670065580594011E-03 -0.166666666666667011E-01 0.172964393185634988E-01 -0.239646313460681002E-07 -0.141635961417651987E-03 -0.121270668854361004E-05 0.178147055904161987E-01 -0.434952598750324988E-03 -0.166666666666667011E-01 0.193531525591995983E-01 -0.122237998046724992E-02 -0.435186141504625024E-03 -0.114985437531919007E-02 0.194307215840397000E-01 -0.166666666666667011E-01 0.173112910791177986E-01 -0.253865277378717003E-04 0.000000000000000000E+00 -0.166666666666667011E-01 -0.138259363976047007E-03 0.178692219185750990E-01 0.000000000000000000E+00 -0.166666666666667011E-01 0.193634106903348008E-01 -0.124536794862644003E-02 -0.118108829657000993E-02 0.194595580860945985E-01 -0.166666666666667011E-01 -0.435070677392060000E-03 0.194631053165025998E-01 -0.118099345597813001E-02 -0.435068106728430011E-03 -0.125584229113298996E-02 0.193881574845413997E-01 -0.139730287993371005E-03 -0.166666666666667011E-01 0.172965129995373001E-01 -0.246134321847894014E-07 -0.141982715458390012E-03 -0.124443001739989997E-05 0.178147384368371013E-01 -0.141918001872325990E-03 0.178147321831834994E-01 -0.123839758886950008E-05 -0.166666666666667011E-01 -0.139719073147956993E-03 -0.244906574666029988E-07 0.172965128605244989E-01 -0.166666666666667011E-01 -0.434974886831416001E-03 0.193565238209871988E-01 -0.122539181987882991E-02 -0.435163868606227981E-03 -0.115249078341496999E-02 0.194335198475120013E-01 -0.166666666666667011E-01 0.173108110594775988E-01 -0.252486229488127016E-04 0.000000000000000000E+00 -0.166666666666667011E-01 -0.137635746082909991E-03 0.178684051236191015E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.140103968298785998E-03 0.000000000000000000E+00 0.173357792454707990E-01 -0.263227882027079996E-07 -0.143405882709686988E-03 -0.122767817853369000E-05 0.178147220760386006E-01 -0.166666666666667011E-01 0.193641798191389013E-01 -0.124570775063777002E-02 -0.118110177773261002E-02 0.194596602746646002E-01 -0.140117598274002998E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 0.173359378634117996E-01 -0.264605452289240990E-07 -0.143470065352009998E-03 -0.123349701123618009E-05 0.178147283869216008E-01 -0.139699946083262993E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.172964334581704009E-01 -0.947724450009741998E-08 -0.141286163109395011E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.427036318179888015E-06 0.177496010208582009E-01 -0.166666666666667011E-01 -0.435115619333969008E-03 0.188851999358167003E-01 -0.625665684806612967E-03 -0.435023156520779992E-03 -0.676915827627737983E-03 0.188324700803031986E-01 -0.166666666666667011E-01 -0.139445450795412993E-03 0.174062618030441998E-01 -0.190292129986386985E-07 -0.139963260485348001E-03 -0.765758079538392050E-06 0.178142630176348002E-01 -0.532898920971015019E-03 0.187657876890418990E-01 -0.512856477472066019E-03 -0.166666666666667011E-01 -0.274567299328894003E-03 -0.183874244209534995E-03 0.177330719708947010E-01 -0.166666666666667011E-01 -0.142934406904232013E-03 0.173279989505247985E-01 -0.109940048199017001E-06 -0.158611614219584011E-03 -0.505928267052957010E-05 0.178189142473271986E-01 -0.166666666666667011E-01 0.191837857182392997E-01 -0.116715933513682007E-02 -0.166666666666667011E-01 -0.115261551048108000E-02 0.193075009319374005E-01 -0.166666666666667011E-01 0.192715504217954997E-01 -0.120392392385177999E-02 -0.117898318247606007E-02 0.194477863404796010E-01 -0.166666666666667011E-01 -0.435541716493012012E-03 0.194766610463366005E-01 -0.120362759000766000E-02 -0.434596353391778010E-03 -0.125584159818315009E-02 0.193897157787361017E-01 -0.435053772892073011E-03 -0.166666666666667011E-01 0.193916866829711015E-01 -0.125980517520123993E-02 0.000000000000000000E+00 -0.435085010459558010E-03 -0.118504095738582995E-02 0.194671524144862003E-01 -0.166666666666667011E-01 -0.139670776269951012E-03 0.172964393783541989E-01 -0.239722661869433015E-07 -0.141640063065600003E-03 -0.121308200605217006E-05 0.178147059781510013E-01 -0.166666666666667011E-01 -0.435069330991406000E-03 0.193887306635154993E-01 -0.125643445841802995E-02 -0.435069453134320002E-03 -0.118150726982913000E-02 0.194636605867374995E-01 -0.140346027430110010E-03 0.178144315994148983E-01 -0.933532497936932975E-06 -0.166666666666667011E-01 -0.140345973839162006E-03 -0.933513688391224014E-06 0.178144303098709995E-01 -0.166666666666667011E-01 -0.139688634719759995E-03 0.172964363725470992E-01 -0.241646447299009009E-07 -0.141743168416124011E-03 -0.122254521804739008E-05 0.178147146809073016E-01 -0.435059870297027000E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.193910369733675987E-01 -0.125956508747497001E-02 -0.435078913540753987E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.118493531740329009E-02 0.194660938610287000E-01 0.000000000000000000E+00 -0.435045710174179019E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.193875426795061985E-01 -0.125569402526470994E-02 -0.435093072169510985E-03 -0.118112078544253991E-02 0.194631751705502017E-01 -0.139419879842034002E-03 -0.166666666666667011E-01 0.172964832727320016E-01 -0.213161584771942993E-07 -0.140230214929088989E-03 -0.108242332728665990E-05 0.178145871202937990E-01 -0.166666666666667011E-01 -0.435070648727867019E-03 0.194635173256733005E-01 -0.118139703309711993E-02 -0.435068135392855987E-03 -0.125625483745250995E-02 0.193885626932315015E-01 -0.139434374869669994E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.172964444175748994E-01 -0.849107878174372025E-08 -0.140068522154330002E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.383811097196054990E-06 0.177500579653602011E-01 0.000000000000000000E+00 -0.140208513794569996E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.177497183126398006E-01 -0.504600381193170004E-07 -0.140451251998329987E-03 -0.520324482162036022E-06 0.178140160927331009E-01 -0.435048849909038004E-03 -0.166666666666667011E-01 0.193915158406327004E-01 -0.125973026995557995E-02 -0.435089932877583986E-03 -0.118503856970240008E-02 0.194671300235447001E-01 -0.435388199753574995E-03 -0.166666666666667011E-01 0.194580790950913997E-01 -0.118263497490539004E-02 -0.434750258397955996E-03 -0.124178531210186001E-02 0.193754724111637015E-01 -0.166666666666667011E-01 0.173081480358036992E-01 -0.244895773689186006E-04 0.000000000000000000E+00 -0.166666666666667011E-01 -0.133922698934494993E-03 0.178635891343980995E-01 0.000000000000000000E+00 -0.166666666666667011E-01 0.193849898137051016E-01 -0.125483132565947994E-02 -0.118151827368068998E-02 0.194632328928430985E-01 -0.166666666666667011E-01 -0.142602728611152993E-03 0.172960572372911001E-01 -0.117224517748718006E-06 -0.158587369045204001E-03 -0.567482454003337982E-05 0.178189579824735989E-01 -0.139720580884210006E-03 -0.166666666666667011E-01 0.177036794314483006E-01 -0.306504126162400988E-07 -0.139932852514187012E-03 -0.499375406901578031E-06 0.178139944442497998E-01 -0.546160778925738975E-03 0.184421024561669013E-01 -0.894165394996089942E-04 -0.166666666666667011E-01 -0.147442848397534993E-03 -0.240246210218283011E-04 0.177137736588957005E-01 -0.166666666666667011E-01 -0.400413710403848991E-03 0.186079235147509001E-01 -0.750380380436909983E-03 -0.465646679664707009E-03 -0.925379800643584029E-03 0.191586075383834986E-01 -0.166666666666667011E-01 0.177029162142693999E-01 -0.193968097316907988E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.245143979550067003E-03 0.179475102930650998E-01 -0.166666666666667011E-01 -0.140401212229714989E-03 0.173460971401947009E-01 -0.290712390925789007E-07 -0.144639789824670004E-03 -0.132223121416674009E-05 0.178148169771682983E-01 -0.166666666666667011E-01 0.193485878399233015E-01 -0.123880031430750989E-02 -0.118081271609363995E-02 0.194575794108941998E-01 -0.139755721134129007E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 0.173348680988957005E-01 -0.228988835753830997E-07 -0.141707074016602990E-03 -0.107451126571115990E-05 0.178145682418863992E-01 -0.139419950330123000E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.172964916757554002E-01 -0.212265241915647985E-07 -0.140190964958966001E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.107696768404692002E-05 0.178142558899385996E-01 -0.166666666666667011E-01 -0.143972832788044007E-03 0.173505501235237010E-01 -0.481361186863988992E-04 -0.558385355651620026E-03 -0.438374207210451001E-03 0.187218708288803999E-01 -0.166666666666667011E-01 -0.139941367226140996E-03 0.174078891209918998E-01 -0.228259192699078984E-07 -0.141815000445607008E-03 -0.909747619509022010E-06 0.178144035116544995E-01 -0.143597630564741997E-03 0.178145584149138995E-01 -0.106316648366329002E-05 -0.166666666666667011E-01 -0.140417726652687994E-03 -0.268131811843163990E-07 0.174078935255252004E-01 -0.166666666666667011E-01 -0.434071240888807990E-03 0.193423066886313985E-01 -0.122826337417718994E-02 -0.436064312888945010E-03 -0.116769060685687994E-02 0.194456917479709016E-01 -0.166666666666667011E-01 0.172982030071983997E-01 -0.280949763384092005E-07 -0.166666666666667011E-01 -0.189517762455052993E-05 0.178271977135736996E-01 -0.166666666666667011E-01 0.178265008932003005E-01 -0.134657556992853991E-05 -0.925862840600532052E-06 0.178144080134048990E-01 -0.166666666666667011E-01 -0.434776065018225015E-03 0.193791533646124996E-01 -0.125252599578476996E-02 -0.435362444422488025E-03 -0.118186375443277993E-02 0.194628395316310994E-01 -0.140181958380352012E-03 -0.166666666666667011E-01 0.177492492651218983E-01 -0.497367681715309972E-07 0.000000000000000000E+00 -0.140419741913999988E-03 -0.518097724412703002E-06 0.178140141000072000E-01 -0.166666666666667011E-01 -0.435069320521429986E-03 0.193884456522794985E-01 -0.125614319532225006E-02 -0.435069463604292005E-03 -0.118122256043727992E-02 0.194633694896899009E-01 -0.166666666666667011E-01 -0.139670889730207013E-03 0.172964735664216003E-01 -0.239712525939313013E-07 -0.141640366257597987E-03 -0.121295504750436001E-05 0.178147058424521984E-01 -0.141657676433903007E-03 0.178147071049579997E-01 -0.121453449198612004E-05 -0.166666666666667011E-01 -0.139673889655509000E-03 -0.240033947134963004E-07 0.172964735804191985E-01 -0.166666666666667011E-01 -0.435069155034934006E-03 0.193887186536986986E-01 -0.125642600972210999E-02 -0.435069629090626005E-03 -0.118150164198114007E-02 0.194636536348553983E-01 -0.139672279648728003E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.172964368900011996E-01 -0.239116221440321008E-07 -0.141646519897784998E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.120943312255959009E-05 0.178144561861929007E-01 0.000000000000000000E+00 -0.435057590298097978E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.193887674296497996E-01 -0.125672688730438004E-02 -0.435081193385279013E-03 -0.118196873889853007E-02 0.194640540700679009E-01 -0.139679127767890996E-03 -0.166666666666667011E-01 0.172963729770981983E-01 -0.240508992733560004E-07 -0.141688504708434011E-03 -0.121696226254563010E-05 0.178146602550901985E-01 -0.166666666666667011E-01 -0.435063688039146997E-03 0.193888428280263002E-01 -0.125667755640774008E-02 -0.435075095983264025E-03 -0.118183170521553998E-02 0.194639441233023999E-01 -0.139679366822631002E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.172963909933037986E-01 -0.238904374634853986E-07 -0.141684903032989010E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.120762219986995010E-05 0.178141341272457995E-01 0.000000000000000000E+00 -0.435044663859789993E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.193878878622800004E-01 -0.125607230703734000E-02 -0.435094118323148986E-03 -0.118150745952062992E-02 0.194635591497266010E-01 -0.139672170490270990E-03 -0.166666666666667011E-01 0.172964346444417008E-01 -0.239875039867713010E-07 -0.141648157868298996E-03 -0.121384147158235009E-05 0.178147065643662998E-01 -0.435090496096461013E-03 -0.166666666666667011E-01 0.194669889560541987E-01 -0.118529978595172006E-02 -0.435048286615349021E-03 -0.125913755896959003E-02 0.193915246813111015E-01 -0.166666666666667011E-01 0.172960984732630010E-01 -0.213609392236131997E-07 0.000000000000000000E+00 -0.166666666666667011E-01 -0.106367313562377998E-05 0.178056546915008017E-01 0.000000000000000000E+00 -0.166666666666667011E-01 0.173098191165334987E-01 -0.233466429813017999E-07 -0.116342624074817007E-05 0.178149495945451007E-01 -0.166666666666667011E-01 -0.435066797805972990E-03 0.193883041569685999E-01 -0.125604896999190001E-02 -0.435071986298389026E-03 -0.118116640188501007E-02 0.194633028406514010E-01 -0.139659163853909999E-03 -0.166666666666667011E-01 0.172964806846983010E-01 -0.238454355521033004E-07 -0.141572633531096987E-03 -0.120675578133157002E-05 0.178147008816651985E-01 -0.141538304155846992E-03 0.178146978602927997E-01 -0.120362914934832999E-05 -0.166666666666667011E-01 -0.139653215055702006E-03 -0.237818478129986986E-07 0.172964806257772004E-01 -0.166666666666667011E-01 -0.435101676794290987E-03 0.194659166622981016E-01 -0.118451450992946000E-02 -0.435037104020240977E-03 -0.125773918484995990E-02 0.193902104354441995E-01 -0.166666666666667011E-01 0.172962783320088007E-01 -0.218342670137599985E-07 0.000000000000000000E+00 -0.166666666666667011E-01 -0.109032449721754008E-05 0.178073744715902017E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.434677413065023984E-03 0.000000000000000000E+00 0.193010409686150998E-01 -0.117438144655306010E-02 -0.435460867469465978E-03 -0.110662888842583997E-02 0.193848162973076985E-01 -0.166666666666667011E-01 0.173066829055725999E-01 -0.235026937765211008E-07 -0.117850668861997998E-05 0.178149746842344997E-01 -0.434571088713672988E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 0.192763238297126993E-01 -0.115086473852541002E-02 -0.435566874650336990E-03 -0.108492248973003007E-02 0.193620926345084016E-01 -0.139708981740112000E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.172963531511250990E-01 -0.951377049798812932E-08 -0.141328242724759995E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.428708426390729985E-06 0.177495866410289013E-01 -0.166666666666667011E-01 -0.435067939506500023E-03 0.193883890526193002E-01 -0.125611318806702994E-02 -0.435070844612536994E-03 -0.118121301451965996E-02 0.194633542459594008E-01 -0.166666666666667011E-01 -0.139664557957332988E-03 0.172964746611408010E-01 -0.239032606349812988E-07 -0.141603814090319997E-03 -0.120960968951782991E-05 0.178147026032577996E-01 -0.141639613175400988E-03 0.178147057784965000E-01 -0.121288106723124010E-05 -0.166666666666667011E-01 -0.139670761286573002E-03 -0.239698004783898005E-07 0.172964747236447995E-01 -0.166666666666667011E-01 -0.435069392365180986E-03 0.194637108092800999E-01 -0.118155587635168993E-02 -0.435069391760556996E-03 -0.125648510526804006E-02 0.193887815570613006E-01 -0.166666666666667011E-01 0.172964151549298006E-01 -0.947849091229853943E-08 -0.166666666666667011E-01 -0.427106302281655022E-06 0.177495992959328004E-01 -0.166666666666667011E-01 0.177492248112503005E-01 -0.496796927091242970E-07 -0.517779383578906051E-06 0.178140139540073003E-01 -0.166666666666667011E-01 -0.435081057490304974E-03 0.194615601858393016E-01 -0.117973336574162002E-02 -0.435057726202830975E-03 -0.125399377351724994E-02 0.193864040661351003E-01 -0.139666404223663996E-03 -0.166666666666667011E-01 0.173052378719510992E-01 -0.233928603649598996E-07 0.000000000000000000E+00 -0.141527802484294987E-03 -0.116498797383902993E-05 0.178146582832621990E-01 -0.166666666666667011E-01 -0.139669857630702005E-03 0.172964394671825013E-01 -0.239623924701885009E-07 -0.141634759721171003E-03 -0.121259630273193999E-05 0.178147054943494990E-01 -0.166666666666667011E-01 -0.140555314552680993E-03 0.178144675283670000E-01 -0.934892070952238007E-06 -0.140347413389892010E-03 -0.933745297868121043E-06 0.178144318010057985E-01 -0.435067722986518012E-03 0.193882964019922999E-01 -0.125598089226015993E-02 -0.166666666666667011E-01 -0.435071061130372014E-03 -0.118115048591624990E-02 0.194632553716142000E-01 -0.166666666666667011E-01 -0.435069777018943008E-03 0.194636048172950014E-01 -0.118146131478344993E-02 -0.435069007106324973E-03 -0.125636789473562000E-02 0.193886685693907997E-01 -0.139690885024778004E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.172964302280185014E-01 -0.950505359387174008E-08 -0.141247379876703009E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.428634134774421003E-06 0.177500186390967991E-01 0.000000000000000000E+00 -0.139677196785329999E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.177496423494717011E-01 -0.457508561380249031E-07 -0.139805680326738989E-03 -0.473830440639680986E-06 0.178139751639271003E-01 -0.435493815113026025E-03 -0.166666666666667011E-01 0.194038726929433997E-01 -0.113147962792324000E-02 -0.434644379614391009E-03 -0.118519815382812010E-02 0.193210650330794993E-01 -0.166666666666667011E-01 -0.139355161737402005E-03 0.172964917336673983E-01 -0.205843400324393006E-07 -0.139851518567708007E-03 -0.104623713006894006E-05 0.178145496587001004E-01 -0.435276969900176027E-03 -0.166666666666667011E-01 0.194123409556366984E-01 -0.114355309658945004E-02 -0.434861669984602990E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.119672311455852002E-02 0.193457917807220992E-01 0.000000000000000000E+00 -0.435510762531134978E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.194758550382455008E-01 -0.120231103100156000E-02 -0.434627398233908020E-03 -0.125592048119514991E-02 0.193896956616587991E-01 -0.139662076715157995E-03 -0.166666666666667011E-01 0.172964848493273003E-01 -0.238760385660033009E-07 -0.141589392729580995E-03 -0.120824848732212997E-05 0.178147013421607013E-01 -0.435069416060821000E-03 -0.166666666666667011E-01 0.194636372101129017E-01 -0.118148444615658998E-02 -0.435069368064916007E-03 -0.125641081782009996E-02 0.193887089888112006E-01 -0.166666666666667011E-01 0.172964393423152007E-01 -0.239438465741853011E-07 0.000000000000000000E+00 -0.166666666666667011E-01 -0.121143525352024005E-05 0.178146131933784001E-01 0.000000000000000000E+00 -0.166666666666667011E-01 0.172964393844348002E-01 -0.239719133473768010E-07 -0.121306464651348996E-05 0.178147059621872987E-01 -0.166666666666667011E-01 -0.435085659385655980E-03 0.194638181335962009E-01 -0.118203438802173995E-02 -0.435053123899394999E-03 -0.125612026912269993E-02 0.193884809592554004E-01 -0.139670749044936993E-03 -0.166666666666667011E-01 0.172964759607952996E-01 -0.239695886158460013E-07 -0.141639529816893012E-03 -0.121286786314684000E-05 0.178147057662445991E-01 -0.141639434640671995E-03 0.178147057572842007E-01 -0.121285914937942990E-05 -0.166666666666667011E-01 -0.139670732553277014E-03 -0.239694113970498999E-07 0.172964759605990989E-01 -0.166666666666667011E-01 -0.435069447260804027E-03 0.194636380955877999E-01 -0.118148603206807003E-02 -0.435069336864923981E-03 -0.125641079713200007E-02 0.193887090761660011E-01 -0.166666666666667011E-01 0.172964393436225994E-01 -0.239438464742388014E-07 0.000000000000000000E+00 -0.166666666666667011E-01 -0.121143524546324995E-05 0.178146131932993002E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.435064994893337995E-03 0.000000000000000000E+00 0.193885567753522003E-01 -0.125634406111458990E-02 -0.435073789170992999E-03 -0.118148100326300994E-02 0.194636166968780999E-01 -0.166666666666667011E-01 0.172964393844214012E-01 -0.239719133482090010E-07 -0.121306464658341994E-05 0.178147059621873993E-01 -0.435064994896760008E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 0.193885567753732009E-01 -0.125634406106166999E-02 -0.435073789167570986E-03 -0.118148100316007990E-02 0.194636166967958983E-01 -0.160852226257028993E-03 -0.166666666666667011E-01 0.172967522287055010E-01 -0.175965589473195009E-07 -0.161650188610159987E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.203407230368285005E-06 0.173054266458900000E-01 -0.166666666666667011E-01 -0.435068952667249981E-03 0.193884313397972990E-01 -0.125613577038263011E-02 -0.435069831457874994E-03 -0.118122048876310006E-02 0.194633662905455002E-01 -0.166666666666667011E-01 -0.139672097032745988E-03 0.172964738095684016E-01 -0.239841856617656007E-07 -0.141647330681414008E-03 -0.121359018892023999E-05 0.178147064035918991E-01 -0.141624421727624012E-03 0.178147048779919992E-01 -0.121150370057904998E-05 -0.166666666666667011E-01 -0.139668126615946994E-03 -0.239417188530348007E-07 0.172964738011583997E-01 -0.166666666666667011E-01 -0.435074178605607004E-03 0.194493807353944004E-01 -0.116766964883922007E-02 -0.435064605447021003E-03 -0.124201443598458009E-02 0.193746725929095989E-01 -0.166666666666667011E-01 0.173081303330447009E-01 -0.244868682847540015E-04 -0.166666666666667011E-01 -0.133924491202099991E-03 0.178635877949816016E-01 -0.166666666666667011E-01 0.173348654763293003E-01 -0.223062642015643999E-07 -0.104752868644245000E-05 0.178145411980309004E-01 -0.166666666666667011E-01 -0.435066924736750998E-03 0.193888351972144012E-01 -0.125659460890097997E-02 -0.435071859369655009E-03 -0.118170026599907991E-02 0.194638400334165990E-01 -0.139784932051159995E-03 -0.166666666666667011E-01 0.177494904443947013E-01 -0.465951791816761021E-07 0.000000000000000000E+00 -0.139919920565161006E-03 -0.483545322941620994E-06 0.178139539234989999E-01 -0.166666666666667011E-01 -0.435069373706029993E-03 0.193884428434829004E-01 -0.125613915345877008E-02 -0.435069410419707014E-03 -0.118121780684916997E-02 0.194633650118269004E-01 -0.166666666666667011E-01 -0.139672737807363003E-03 0.172964741998577996E-01 -0.239910227158871999E-07 -0.141651023984394999E-03 -0.121392525480250999E-05 0.178147066608579999E-01 -0.141640091614955997E-03 0.178147058209647997E-01 -0.121292720982870991E-05 -0.166666666666667011E-01 -0.139670843240139006E-03 -0.239707145982510011E-07 0.172964741884388991E-01 -0.166666666666667011E-01 -0.435065762561208998E-03 0.193887058048313986E-01 -0.125648894894022007E-02 -0.435073021522692984E-03 -0.118161486512369002E-02 0.194637424061168997E-01 -0.139514021925975012E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.172964189492996988E-01 -0.222147158226326007E-07 -0.140733410708959988E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.112570816185458990E-05 0.178142652186864016E-01 0.000000000000000000E+00 -0.435053679420991009E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.193881873511392994E-01 -0.125619573968341011E-02 -0.435085103920473015E-03 -0.118149860448210999E-02 0.194635877202401011E-01 -0.139671537483522003E-03 -0.166666666666667011E-01 0.172964363686913987E-01 -0.239806171193915986E-07 -0.141644487121937989E-03 -0.121349920180918992E-05 0.178147063153519004E-01 -0.166666666666667011E-01 -0.435069239606207980E-03 0.193887146861679988E-01 -0.125642003417049007E-02 -0.435069544519456981E-03 -0.118149450125770001E-02 0.194636470456143011E-01 -0.139671620203623008E-03 -0.166666666666667011E-01 0.172964378382645008E-01 -0.239229803350932009E-07 -0.141643246303753000E-03 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.121015091751334009E-05 0.178145160915291999E-01 0.000000000000000000E+00 -0.435060404326353023E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.193885964889095007E-01 -0.125648696022081000E-02 -0.435078379542822014E-03 -0.118169041399521007E-02 0.194637954413787000E-01 -0.139513918934682001E-03 -0.166666666666667011E-01 0.172964088839709998E-01 -0.223091853795638985E-07 -0.140734728439723009E-03 -0.113122244037598008E-05 0.178145985581429009E-01 -0.142980894195082010E-03 -0.166666666666667011E-01 0.173314539890347015E-01 -0.109866000013417994E-06 -0.158534109366701998E-03 -0.501444632546631029E-05 0.178188430632978990E-01 -0.166666666666667011E-01 0.191833678841652998E-01 -0.116695963416674989E-02 0.000000000000000000E+00 -0.166666666666667011E-01 -0.115255809689225998E-02 0.193073016724906003E-01 0.000000000000000000E+00 -0.166666666666667011E-01 0.172969314941213009E-01 -0.239425929167627016E-07 -0.121051376515920001E-05 0.178147034232611010E-01 -0.166666666666667011E-01 -0.435672877594180986E-03 0.190410730770498998E-01 -0.779817530267083964E-03 -0.434464569145039006E-03 -0.823623092840541026E-03 0.189719892294968988E-01 -0.147270553185889990E-03 -0.166666666666667011E-01 0.174669464297294991E-01 -0.100890525843168994E-03 -0.564704630029780048E-03 -0.477372644101759012E-03 0.187551220717942987E-01 -0.139439417619302005E-03 0.178140970993464001E-01 -0.575952369555323042E-06 -0.166666666666667011E-01 -0.139258097457669987E-03 -0.129863402849587998E-07 0.173691959079540006E-01 -0.166666666666667011E-01 -0.437167519508436999E-03 0.195228624474010001E-01 -0.126248680382140001E-02 -0.432956808905804989E-03 -0.125491318727090004E-02 0.193932788356039991E-01 -0.166666666666667011E-01 0.172966662193905010E-01 -0.121413434668133993E-06 0.000000000000000000E+00 -0.166666666666667011E-01 -0.972291627722414967E-05 0.179398884478061994E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.350105795458797975E-03 0.000000000000000000E+00 0.179358412564393013E-01 -0.564926743849099038E-05 -0.144639853703507987E-03 -0.314581126948723974E-06 0.178138319445937006E-01 -0.166666666666667011E-01 0.193703856826768009E-01 -0.124844632332796993E-02 -0.118120734934456010E-02 0.194604805418001001E-01 -0.431301130210841007E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 0.192675040489116989E-01 -0.119991064076258990E-02 -0.438790368590655981E-03 -0.117513383015891001E-02 0.194438480812311985E-01 -0.139700882958903997E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.172963963463645017E-01 -0.952678627103132059E-08 -0.141292751882909993E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.429511932811560008E-06 0.177498907292438987E-01 -0.166666666666667011E-01 -0.435115740933037984E-03 0.188852022804432002E-01 -0.625668905066499950E-03 -0.435023034878147990E-03 -0.676916698779699965E-03 0.188324708841117010E-01 -0.166666666666667011E-01 -0.139446455078796010E-03 0.174062615201245989E-01 -0.190374494518299010E-07 -0.139967041096937007E-03 -0.766079367871497027E-06 0.178142633180293011E-01 -0.532899100111402005E-03 0.187657876777795017E-01 -0.512856379296787999E-03 -0.166666666666667011E-01 -0.274566803915548998E-03 -0.183873918065665000E-03 0.177330707817049994E-01 -0.166666666666667011E-01 -0.142934407180007997E-03 0.173279989208002007E-01 -0.109940092441766996E-06 -0.158611617255726990E-03 -0.505928492620163000E-05 0.178189142493529011E-01 -0.166666666666667011E-01 0.191837856777059002E-01 -0.116715931604291999E-02 -0.166666666666667011E-01 -0.115261550320967993E-02 0.193075009050143985E-01 -0.166666666666667011E-01 0.192715504007066017E-01 -0.120392391412785997E-02 -0.117898318183162006E-02 0.194477863374666986E-01 -0.166666666666667011E-01 -0.435539737045843010E-03 0.194769265765691001E-01 -0.120385500091470992E-02 -0.434598338907063995E-03 -0.125615984296152997E-02 0.193900185880402989E-01 -0.435045514645459013E-03 -0.166666666666667011E-01 0.193914099137533005E-01 -0.125968948646175004E-02 0.000000000000000000E+00 -0.435093267670908987E-03 -0.118504666533010995E-02 0.194671248940259008E-01 -0.166666666666667011E-01 -0.139670791769998004E-03 0.172964393766727002E-01 -0.239724329252018989E-07 -0.141640152553009998E-03 -0.121309020944367992E-05 0.178147059866567009E-01 -0.166666666666667011E-01 -0.435069301629178995E-03 0.193887307626826010E-01 -0.125643515612083000E-02 -0.435069482496533021E-03 -0.118150837470912999E-02 0.194636615934981015E-01 -0.140348157921430997E-03 0.178144318702273992E-01 -0.933811502946043969E-06 -0.166666666666667011E-01 -0.140348150101401000E-03 -0.933783910990536035E-06 0.178144299643035985E-01 -0.166666666666667011E-01 -0.139689488845466003E-03 0.172964362338874009E-01 -0.241738769572596000E-07 -0.141748099793582007E-03 -0.122299932206489994E-05 0.178147151092019999E-01 -0.435059916514922017E-03 -0.166666666666667011E-01 0.193909887083489017E-01 -0.125955268129309002E-02 0.000000000000000000E+00 -0.435078867325628023E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.118493749346423004E-02 0.194660289027400006E-01 0.000000000000000000E+00 -0.435044083341360984E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.193878554391240000E-01 -0.125604934766983993E-02 -0.435094698749261975E-03 -0.118149272049225995E-02 0.194635444483519000E-01 -0.139670978031983007E-03 -0.166666666666667011E-01 0.172964365263250999E-01 -0.239746177115109000E-07 -0.141641256924305989E-03 -0.121320404495395004E-05 0.178147060925782016E-01 -0.166666666666667011E-01 -0.435069350149236000E-03 0.193887875206789004E-01 -0.125649208526834003E-02 -0.435069433976496020E-03 -0.118156330259116009E-02 0.194637181802562002E-01 -0.139700455923195987E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.172964141750712001E-01 -0.947598200812492946E-08 -0.141288502520823991E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.426976078434958026E-06 0.177495760715989000E-01 0.000000000000000000E+00 -0.140209281941629005E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.177492016410450010E-01 -0.499125605911364029E-07 -0.140454288917710998E-03 -0.520385719864523011E-06 0.178140161378314010E-01 -0.435047474599787026E-03 -0.166666666666667011E-01 0.193914777822561009E-01 -0.125971925119921992E-02 -0.435091308001488001E-03 -0.118504760094160990E-02 0.194671336437930997E-01 -0.435388198571750988E-03 -0.166666666666667011E-01 0.194580790627339011E-01 -0.118263492130232990E-02 -0.434750259582202979E-03 -0.124178531317212996E-02 0.193754724084427010E-01 -0.166666666666667011E-01 0.173081480357068010E-01 -0.244895773575376999E-04 0.000000000000000000E+00 -0.166666666666667011E-01 -0.133922698986129010E-03 0.178635891344391985E-01 0.000000000000000000E+00 -0.166666666666667011E-01 0.193849898136359000E-01 -0.125483132562911989E-02 -0.118151827367967994E-02 0.194632328928340016E-01 -0.166666666666667011E-01 -0.142602747236407002E-03 0.172960559757211994E-01 -0.117227408613948994E-06 -0.158587582136584995E-03 -0.567497435835260990E-05 0.178189581300063002E-01 -0.139720580991034007E-03 -0.166666666666667011E-01 0.177036782302840007E-01 -0.306502532037200970E-07 -0.139932855252091994E-03 -0.499376182467009957E-06 0.178139944449866999E-01 -0.546160785305606963E-03 0.184421025597138015E-01 -0.894167397427545998E-04 -0.166666666666667011E-01 -0.147442821218409995E-03 -0.240244287551271008E-04 0.177137723141376005E-01 -0.166666666666667011E-01 -0.400413696341282016E-03 0.186079233649920003E-01 -0.750380296538879967E-03 -0.465646690569393017E-03 -0.925379786421282957E-03 0.191586075172780999E-01 -0.166666666666667011E-01 0.177029162002804996E-01 -0.193968089936665005E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.245143973844672025E-03 0.179475102895724006E-01 -0.166666666666667011E-01 -0.140401212232880995E-03 0.173460971398785996E-01 -0.290712391494978008E-07 -0.144639789846284008E-03 -0.132223121759111006E-05 0.178148169771744011E-01 -0.166666666666667011E-01 0.193485878396606990E-01 -0.123880031419069990E-02 -0.118081271608876993E-02 0.194575794108615002E-01 -0.139755721135340007E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 0.173348680988911000E-01 -0.228988835869186992E-07 -0.141707074022604998E-03 -0.107451126624275990E-05 0.178145682418867010E-01 -0.139671204369828987E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.172964527346999991E-01 -0.238206247996137016E-07 -0.141637869293981998E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.120424066353106008E-05 0.178141993541914984E-01 -0.166666666666667011E-01 -0.435293328042460988E-03 0.188969200156620006E-01 -0.638325692737096986E-03 -0.434845262794537977E-03 -0.686401912695584044E-03 0.188415679710590006E-01 -0.166666666666667011E-01 -0.139450260715297009E-03 0.174035477820680996E-01 -0.190790185253572011E-07 -0.139988379398403009E-03 -0.772546665100815971E-06 0.178142695812380999E-01 -0.535400152422887033E-03 0.187643522383528008E-01 -0.509589395081856949E-03 -0.166666666666667011E-01 -0.267380606913843026E-03 -0.178487068761980011E-03 0.177166953062542001E-01 -0.166666666666667011E-01 -0.142812108775019998E-03 0.173285847433682014E-01 -0.101845505783431005E-06 -0.158027855199464992E-03 -0.469583501144497018E-05 0.178185786022877005E-01 -0.166666666666667011E-01 0.191928643172403005E-01 -0.117144676995123995E-02 -0.166666666666667011E-01 -0.115407859919587998E-02 0.193128636528600017E-01 -0.166666666666667011E-01 0.192743847992538006E-01 -0.120526566976207009E-02 -0.117896448649838000E-02 0.194473150974792015E-01 -0.166666666666667011E-01 -0.435531752463573995E-03 0.194769704025496010E-01 -0.120375472484332991E-02 -0.434606347505284997E-03 -0.125643108232954001E-02 0.193902401795961014E-01 -0.435044468242076017E-03 -0.166666666666667011E-01 0.193871801282229986E-01 -0.125535484215398997E-02 0.000000000000000000E+00 -0.435094313909530008E-03 -0.118080938637835000E-02 0.194628399719979014E-01 -0.166666666666667011E-01 -0.139670871572238991E-03 0.172964393487799011E-01 -0.239732892684089006E-07 -0.141640613379916011E-03 -0.121313235466317992E-05 0.178147060195563006E-01 -0.166666666666667011E-01 -0.435069156529857982E-03 0.193887053622597005E-01 -0.125641208621197001E-02 -0.435069627595703981E-03 -0.118148788386283009E-02 0.194636401109469989E-01 -0.140357102716023998E-03 0.178144327480211015E-01 -0.934953983487829978E-06 -0.166666666666667011E-01 -0.140357082074367998E-03 -0.934881658679828005E-06 0.178144277579760993E-01 -0.166666666666667011E-01 -0.139671248276079991E-03 0.172964387342726990E-01 -0.239772668643883999E-07 -0.141642790859008988E-03 -0.121332835924946005E-05 0.178147058633731994E-01 -0.435073048109683026E-03 -0.166666666666667011E-01 0.194629028419433005E-01 -0.118081642190051008E-02 -0.435065735973592016E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.125566691830197993E-02 0.193878887757382989E-01 0.000000000000000000E+00 -0.435064995098833989E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.193890263762267999E-01 -0.125684228342590990E-02 -0.435073788965510016E-03 -0.118197542174123009E-02 0.194640901826458011E-01 -0.139679109099943003E-03 -0.166666666666667011E-01 0.172963725697189014E-01 -0.240507205201775987E-07 -0.141688401087265996E-03 -0.121695435575037005E-05 0.178146602328799995E-01 -0.166666666666667011E-01 -0.435064159301769998E-03 0.193889744409617010E-01 -0.125680005533066992E-02 -0.435074624737013995E-03 -0.118194373351253996E-02 0.194640651337368992E-01 -0.139708967713254010E-03 -0.166666666666667011E-01 0.172963616356281991E-01 -0.951324367715890009E-08 -0.141328092009716008E-03 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.428676150398848976E-06 0.177495851017135005E-01 0.000000000000000000E+00 -0.140184006518702988E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.177492476417109996E-01 -0.497520252347835031E-07 -0.140422322794949995E-03 -0.518268041627564046E-06 0.178140140868086015E-01 -0.435065432183330027E-03 -0.166666666666667011E-01 0.193879055107330014E-01 -0.125567283667538999E-02 -0.435073351892596022E-03 -0.118081969348847004E-02 0.194629358214771994E-01 -0.371043726257224002E-03 -0.166666666666667011E-01 0.180438943525561002E-01 -0.288534096345938022E-03 -0.486290379332660018E-03 -0.560900084076317983E-03 0.187957069582666003E-01 -0.166666666666667011E-01 0.177374439328937990E-01 -0.214133585214810996E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.267021959071755976E-03 0.179665664489489993E-01 -0.166666666666667011E-01 0.193465909995582004E-01 -0.123791262449076994E-02 -0.118077342878954009E-02 0.194573127654812002E-01 -0.166666666666667011E-01 -0.139258097457669987E-03 0.172965161025564994E-01 -0.139408267359583998E-07 -0.139258097457669987E-03 -0.716280208726827027E-06 0.178142098987628013E-01 -0.448745061591116986E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.187014625769068993E-01 -0.470417770715891002E-03 -0.420615920070051990E-03 -0.416030208289801002E-03 0.185800388700857987E-01 -0.143647586674343006E-03 0.178142597734856009E-01 -0.784926914613626987E-06 -0.166666666666667011E-01 -0.289422112454956976E-03 -0.392634159972045978E-05 0.178691591832313007E-01 -0.166666666666667011E-01 -0.433838061791930977E-03 0.193348473865251995E-01 -0.122492954566329995E-02 -0.436295790691586000E-03 -0.116751653970171990E-02 0.194447289756928994E-01 -0.166666666666667011E-01 0.173080151777424002E-01 -0.244742517923652000E-04 0.000000000000000000E+00 -0.166666666666667011E-01 -0.133996459548231008E-03 0.178636488295263011E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.139757971222783007E-03 0.000000000000000000E+00 0.173348621827651983E-01 -0.229201530844114011E-07 -0.141718205670910005E-03 -0.107549290769848001E-05 0.178145692111379012E-01 -0.166666666666667011E-01 0.193848567620656005E-01 -0.125477293295166007E-02 -0.118151635482893996E-02 0.194632155619245996E-01 -0.140442473137899012E-03 -0.166666666666667011E-01 0.173484955870056995E-01 -0.294133609017818009E-07 -0.144782161335538988E-03 -0.133037270602234991E-05 0.178148273619183999E-01 -0.139679362257274988E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.172964143215187001E-01 -0.238876115774515004E-07 -0.141684595140730989E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.120742002760442999E-05 0.178141299090062988E-01 -0.166666666666667011E-01 -0.139670790012750987E-03 0.172964393796226010E-01 -0.239724130311146011E-07 -0.141640142355812013E-03 -0.121308921736943007E-05 0.178147059830380990E-01 -0.166666666666667011E-01 -0.140356055867897991E-03 0.178144308211048014E-01 -0.934793117616238968E-06 -0.140356063365803011E-03 -0.934819410434324011E-06 0.178144326353903010E-01 -0.435069477705365992E-03 0.194636362328108985E-01 -0.118148328063351999E-02 -0.166666666666667011E-01 -0.435069306420349006E-03 -0.125640963064629994E-02 0.193887060480272995E-01 -0.166666666666667011E-01 -0.139670746102450000E-03 0.172964393573767014E-01 -0.239719438513679986E-07 -0.141639889164474000E-03 -0.121306620727653000E-05 0.178147059636829010E-01 -0.166666666666667011E-01 0.193887976067478006E-01 -0.125650162570866992E-02 -0.166666666666667011E-01 -0.118157207781601002E-02 0.194637272757679988E-01 -0.166666666666667011E-01 0.193887977046092001E-01 -0.125650166419828007E-02 -0.118157208862118990E-02 0.194637273843102990E-01 -0.166666666666667011E-01 -0.435064765067235998E-03 0.193889522097447989E-01 -0.125676448751559002E-02 -0.435074018990514973E-03 -0.118190001722009996E-02 0.194640239141652995E-01 -0.140182361260136993E-03 -0.166666666666667011E-01 0.177493225039018002E-01 -0.498169409981402992E-07 0.000000000000000000E+00 -0.140419962900806993E-03 -0.518131012618996021E-06 0.178140141298323007E-01 -0.166666666666667011E-01 -0.435069323598363008E-03 0.193884453764502987E-01 -0.125614284670238011E-02 -0.435069460527360988E-03 -0.118122217360852990E-02 0.194633691143310007E-01 -0.166666666666667011E-01 -0.139670884426293997E-03 0.172964736171049016E-01 -0.239711924341550007E-07 -0.141640335129922002E-03 -0.121295197632909002E-05 0.178147058397751003E-01 -0.141656453358155989E-03 0.178147070060390988E-01 -0.121442246005117002E-05 -0.166666666666667011E-01 -0.139673677795910993E-03 -0.240011176184461992E-07 0.172964736295889016E-01 -0.166666666666667011E-01 -0.435069164848058006E-03 0.193887179318738001E-01 -0.125642505656990006E-02 -0.435069619277515991E-03 -0.118150056235400997E-02 0.194636526000948017E-01 -0.139672366859797993E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.172964370349440004E-01 -0.239153496141802001E-07 -0.141647103173812004E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.120964050265571001E-05 0.178144653667999010E-01 0.000000000000000000E+00 -0.435058020368536008E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.193887196651586005E-01 -0.125666847383042006E-02 -0.435080763346489007E-03 -0.118190508477706993E-02 0.194639922621226008E-01 -0.139641248248434992E-03 -0.166666666666667011E-01 0.172963810772421012E-01 -0.236457807710709016E-07 -0.141469789611806004E-03 -0.119702907967459002E-05 0.178146434627204017E-01 -0.166666666666667011E-01 -0.435064079024843019E-03 0.193888050589719994E-01 -0.125663030165246007E-02 -0.435074705011247002E-03 -0.118177958688183999E-02 0.194638936988976996E-01 -0.139641455576729011E-03 -0.166666666666667011E-01 0.172963976004947007E-01 -0.234988035352734987E-07 -0.141466765709830994E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.118846831550546996E-05 0.178141535165701990E-01 0.000000000000000000E+00 -0.435046365527533990E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.193879446262342006E-01 -0.125609596684336995E-02 -0.435092416913644979E-03 -0.118150619229764994E-02 0.194635647529617997E-01 -0.139672261692974013E-03 -0.166666666666667011E-01 0.172964349185803015E-01 -0.239884652187998984E-07 -0.141648681442598987E-03 -0.121388810715753001E-05 0.178147066071378986E-01 -0.435069380083255997E-03 -0.166666666666667011E-01 0.193887114872825991E-01 -0.125641369962899998E-02 -0.435069404042481985E-03 -0.118148620112529007E-02 0.194636395221940012E-01 -0.166666666666667011E-01 0.172965077531164017E-01 -0.927568178720202994E-07 0.000000000000000000E+00 -0.166666666666667011E-01 -0.102046202213369007E-05 0.173051339160117008E-01 0.000000000000000000E+00 -0.166666666666667011E-01 0.193887977533772002E-01 -0.125650168557878009E-02 -0.118157208930007003E-02 0.194637273906578985E-01 -0.166666666666667011E-01 -0.435066628630517986E-03 0.193883490811541000E-01 -0.125609889832018992E-02 -0.435072155470964984E-03 -0.118121788505638997E-02 0.194633537877374994E-01 -0.139670767304303994E-03 -0.166666666666667011E-01 0.172964740023511983E-01 -0.239699122889327989E-07 -0.141639655324462003E-03 -0.121288819251166995E-05 0.178147057860663996E-01 -0.141639719254381006E-03 0.178147057910134007E-01 -0.121289402770353995E-05 -0.166666666666667011E-01 -0.139670778383217008E-03 -0.239700310192222999E-07 0.172964740024197997E-01 -0.166666666666667011E-01 -0.435069359721298977E-03 0.193887107978404985E-01 -0.125641340675414995E-02 -0.435069424404436024E-03 -0.118148620716793996E-02 0.194636394455750990E-01 -0.166666666666667011E-01 0.172965077510262993E-01 -0.927568056586548005E-07 0.000000000000000000E+00 -0.166666666666667011E-01 -0.102046204410674010E-05 0.173051339160213008E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.139674680780886991E-03 0.000000000000000000E+00 0.173041376249668984E-01 -0.235410521181961013E-07 -0.141584640975313996E-03 -0.117464395864609995E-05 0.178146677207322995E-01 -0.166666666666667011E-01 0.193887977504637009E-01 -0.125650168430156001E-02 -0.118157208925927996E-02 0.194637273902763010E-01 -0.139674680734473000E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 0.173041376249788992E-01 -0.235410516367698016E-07 -0.141584640716005010E-03 -0.117464393529904008E-05 0.178146677207122010E-01 -0.139669171022264988E-03 -0.166666666666667011E-01 0.172963674074106989E-01 -0.936361616323369989E-08 -0.141145821435217987E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.422126237945786021E-06 0.177496556211017009E-01 -0.166666666666667011E-01 -0.139670831173391990E-03 0.172964378151377017E-01 -0.239729563898503003E-07 -0.141640396000908991E-03 -0.121311945483194002E-05 0.178147060102482983E-01 -0.166666666666667011E-01 -0.140348082156047988E-03 0.178144284125189012E-01 -0.933754616232120984E-06 -0.140348096313297011E-03 -0.933804570413684980E-06 0.178144318631820002E-01 -0.435069554992271979E-03 0.194636358566133011E-01 -0.118148321892500995E-02 -0.166666666666667011E-01 -0.435069229133381978E-03 -0.125640843271979000E-02 0.193887033474266987E-01 -0.166666666666667011E-01 -0.139670746039409987E-03 0.172964393604615012E-01 -0.239719429806933989E-07 -0.141639888769111005E-03 -0.121306615757996010E-05 0.178147059636586010E-01 -0.166666666666667011E-01 0.193887976105126016E-01 -0.125650162733728990E-02 -0.166666666666667011E-01 -0.118157207791531990E-02 0.194637272767340004E-01 -0.166666666666667011E-01 0.193887977049908011E-01 -0.125650166436534001E-02 -0.118157208862703007E-02 0.194637273843653001E-01 -0.166666666666667011E-01 -0.435069387915653001E-03 0.193887117581042004E-01 -0.125641381798179000E-02 -0.435069396210086012E-03 -0.118148620435052005E-02 0.194636395574343014E-01 -0.139674686777477991E-03 -0.166666666666667011E-01 0.173041492300615996E-01 -0.235404450870160989E-07 0.000000000000000000E+00 -0.141584559821053004E-03 -0.117458831712666003E-05 0.178146676653543994E-01 -0.166666666666667011E-01 -0.139670743890679005E-03 0.172964393844352998E-01 -0.239719183429222001E-07 -0.141639876120318997E-03 -0.121306489215045994E-05 0.178147059624318010E-01 -0.166666666666667011E-01 -0.140348087305803990E-03 0.178144318417805016E-01 -0.933801043435799969E-06 -0.140348087381334013E-03 -0.933801309950123998E-06 0.178144318601900983E-01 -0.435069392938043002E-03 0.194636364111164008E-01 -0.118148311443157998E-02 -0.166666666666667011E-01 -0.435069391187694980E-03 -0.125641070647420998E-02 0.193887087798863995E-01 -0.166666666666667011E-01 -0.435069391191658010E-03 0.193887815134855014E-01 -0.125648507888060008E-02 -0.435069392934080027E-03 -0.118155584789411011E-02 0.194637107834846994E-01 -0.139700198059465998E-03 -0.166666666666667011E-01 0.172964151566727987E-01 -0.947849280924833965E-08 -0.141287467767569990E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.427106383838012019E-06 0.177495992952378008E-01 0.000000000000000000E+00 -0.140178145950843993E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.177492248104791014E-01 -0.496797052902892032E-07 -0.140415043811154997E-03 -0.517779519367424970E-06 0.178140139541318986E-01 -0.435069390908532979E-03 -0.166666666666667011E-01 0.193887088082677003E-01 -0.125641074170613001E-02 -0.435069393217205004E-03 -0.118148315311942002E-02 0.194636364485415991E-01 -0.166666666666667011E-01 -0.139670745499349992E-03 0.172964393844174009E-01 -0.239719356219104995E-07 -0.141639885405697995E-03 -0.121306574176735002E-05 0.178147059632627995E-01 -0.435069390223737009E-03 -0.166666666666667011E-01 0.193887087097285998E-01 -0.125641070240488004E-02 -0.435069393902000974E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.118148314340501995E-02 0.194636363510784990E-01 0.000000000000000000E+00 -0.435069390007495975E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.193887087390940994E-01 -0.125641068860798000E-02 -0.435069394118242007E-03 -0.118148311389442004E-02 0.194636364058402012E-01 -0.139670743908197002E-03 -0.166666666666667011E-01 0.172964393843969000E-01 -0.239719185336623003E-07 -0.141639876221824010E-03 -0.121306490160525007E-05 0.178147059624407002E-01 -0.435069391188885001E-03 -0.166666666666667011E-01 0.193887118630617986E-01 -0.125641385909668990E-02 -0.435069392936854012E-03 -0.118148619756053004E-02 0.194636395637321005E-01 -0.166666666666667011E-01 0.172965077542917012E-01 -0.927568247378030971E-07 0.000000000000000000E+00 -0.166666666666667011E-01 -0.102046200973725997E-05 0.173051339160060005E-01 0.000000000000000000E+00 -0.166666666666667011E-01 0.193887977549708004E-01 -0.125650168627737991E-02 -0.118157208932236990E-02 0.194637273908664990E-01 -0.166666666666667011E-01 -0.435069391154679994E-03 0.193884418577966004E-01 -0.125613776930019998E-02 -0.435069392971057988E-03 -0.118121619345346995E-02 0.194633634742979005E-01 -0.139670761454477999E-03 -0.166666666666667011E-01 0.172964743993829009E-01 -0.239698237237728008E-07 -0.141639617486802999E-03 -0.121288294400935000E-05 0.178147057813331997E-01 -0.141639617487035994E-03 0.178147057813331997E-01 -0.121288294403079010E-05 -0.166666666666667011E-01 -0.139670761454519009E-03 -0.239698237241789995E-07 0.172964743993829009E-01 -0.166666666666667011E-01 -0.435069391188809974E-03 0.193887118630593006E-01 -0.125641385909560006E-02 -0.435069392936928985E-03 -0.118148619756054001E-02 0.194636395637317987E-01 -0.166666666666667011E-01 0.172965077542917012E-01 -0.927568247377395960E-07 0.000000000000000000E+00 -0.166666666666667011E-01 -0.102046200973731990E-05 0.173051339160060005E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.139674680709099010E-03 0.000000000000000000E+00 0.173041376249854009E-01 -0.235410513736555998E-07 -0.141584640574245006E-03 -0.117464392253488990E-05 0.178146677207011994E-01 -0.166666666666667011E-01 0.193887977549708004E-01 -0.125650168627737991E-02 -0.118157208932236990E-02 0.194637273908664990E-01 -0.139674680709099010E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 0.173041376249854009E-01 -0.235410513736546998E-07 -0.141584640574244004E-03 -0.117464392253485009E-05 0.178146677207011994E-01 -0.161601720479338990E-03 -0.166666666666667011E-01 0.172965076665808001E-01 -0.926702349005646988E-07 -0.162351552716187005E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.102033131897240007E-05 0.173051453909592000E-01 -0.166666666666667011E-01 -0.435069503154383993E-03 0.194633707007618000E-01 -0.118122593631263999E-02 -0.435069280971315012E-03 -0.125614175474171000E-02 0.193884463223708003E-01 -0.166666666666667011E-01 -0.139672289609427992E-03 0.172964739021653005E-01 -0.239862388766482005E-07 -0.141648440824267006E-03 -0.121369084068463994E-05 0.178147064711290000E-01 -0.141624407193420988E-03 0.178147048759245002E-01 -0.121150194851005999E-05 -0.166666666666667011E-01 -0.139668124261626994E-03 -0.239416874711979002E-07 0.172964738935757999E-01 -0.166666666666667011E-01 -0.435074183386577979E-03 0.194493808683343997E-01 -0.116766988693369996E-02 -0.435064600665905016E-03 -0.124201443215284004E-02 0.193746726050244011E-01 -0.166666666666667011E-01 0.173081303333391008E-01 -0.244868683301465988E-04 -0.166666666666667011E-01 -0.133924491176626011E-03 0.178635877950088992E-01 -0.166666666666667011E-01 0.173348654763634015E-01 -0.223062642005740000E-07 -0.104752868632071993E-05 0.178145411980307998E-01 -0.166666666666667011E-01 -0.435066580651087987E-03 0.193886968271455989E-01 -0.125646132097252000E-02 -0.435072203449548981E-03 -0.118157534474708009E-02 0.194637086880425011E-01 -0.139516419388416990E-03 -0.166666666666667011E-01 0.173041769139335999E-01 -0.219137423564159007E-07 0.000000000000000000E+00 -0.140700386591277008E-03 -0.109554334382343009E-05 0.178145628168012993E-01 -0.166666666666667011E-01 -0.139670785511788992E-03 0.172964393739205996E-01 -0.239723655938119001E-07 -0.141640116450570004E-03 -0.121308690248200008E-05 0.178147059824802015E-01 -0.166666666666667011E-01 -0.140353492284304998E-03 0.178144307839981988E-01 -0.934469196153570009E-06 -0.140353499090780005E-03 -0.934493112001855014E-06 0.178144324348224990E-01 -0.435069470216686990E-03 0.194636389371891996E-01 -0.118148595888806001E-02 -0.166666666666667011E-01 -0.435069313909032019E-03 -0.125641246449570009E-02 0.193887089321175994E-01 -0.166666666666667011E-01 -0.435066224778556999E-03 0.193888655360086003E-01 -0.125664138810585991E-02 -0.435072559315323980E-03 -0.118175670116711000E-02 0.194638920667745997E-01 -0.139535529617076010E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.172963867645380999E-01 -0.887106097806382040E-08 -0.140533144432323988E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.400546896298884985E-06 0.177498997117815013E-01 0.000000000000000000E+00 -0.140183011740981989E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.177495724693359017E-01 -0.500888730287148026E-07 -0.140419805775519006E-03 -0.518188128586851950E-06 0.178140142381220008E-01 -0.435066535168476983E-03 -0.166666666666667011E-01 0.193886214195925013E-01 -0.125637957080158998E-02 -0.435072248931340003E-03 -0.118149384028357995E-02 0.194636350622922998E-01 -0.166666666666667011E-01 -0.139671934380400999E-03 0.172964390135027998E-01 -0.239846931501482010E-07 -0.141646750301238991E-03 -0.121369351008339006E-05 0.178147064557130988E-01 -0.435070730673722014E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.194635424129478984E-01 -0.118148467355335004E-02 -0.435068053446324991E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.125637211589557001E-02 0.193885950416877000E-01 0.000000000000000000E+00 -0.435065887914215998E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.193887904637096015E-01 -0.125657470314374992E-02 -0.435072896172526973E-03 -0.118169761602073009E-02 0.194638244685289008E-01 -0.139514756609392007E-03 -0.166666666666667011E-01 0.172964082847710011E-01 -0.223179439037086001E-07 -0.140739566181214998E-03 -0.113165464823919991E-05 0.178145987602886004E-01 -0.142980894581813998E-03 -0.166666666666667011E-01 0.173314539689373992E-01 -0.109866042397868997E-06 -0.158534112254571006E-03 -0.501444838217852000E-05 0.178188430651418997E-01 -0.166666666666667011E-01 0.191833678439739010E-01 -0.116695961504079994E-02 0.000000000000000000E+00 -0.166666666666667011E-01 -0.115255809083636006E-02 0.193073016509602985E-01 0.000000000000000000E+00 -0.166666666666667011E-01 0.172969314939965015E-01 -0.239425929242464991E-07 -0.121051376580430009E-05 0.178147034232617012E-01 -0.166666666666667011E-01 -0.435670020554572023E-03 0.190410313364221991E-01 -0.779749213569466953E-03 -0.434467438812183976E-03 -0.823627788704149036E-03 0.189719905685246000E-01 -0.147270534866566013E-03 -0.166666666666667011E-01 0.174669457049126987E-01 -0.100890465185484999E-03 -0.564704633717408051E-03 -0.477373059999760001E-03 0.187551223307545999E-01 -0.139439401685823998E-03 0.178140971035927985E-01 -0.575958645398553022E-06 -0.166666666666667011E-01 -0.139258097457669987E-03 -0.129864673787176997E-07 0.173691952385951996E-01 -0.166666666666667011E-01 -0.437167482025401009E-03 0.195228613586728994E-01 -0.126248570169920009E-02 -0.432956846913321015E-03 -0.125491321979356992E-02 0.193932787767274011E-01 -0.166666666666667011E-01 0.172966662158898984E-01 -0.121413435136702005E-06 0.000000000000000000E+00 -0.166666666666667011E-01 -0.972291637013934920E-05 0.179398884476381013E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.350105792551318000E-03 0.000000000000000000E+00 0.179358412563318005E-01 -0.564926760193745028E-05 -0.144639853967615001E-03 -0.314581136545178986E-06 0.178138319445972984E-01 -0.166666666666667011E-01 0.193703856781149014E-01 -0.124844632131722007E-02 -0.118120734926823010E-02 0.194604805411918991E-01 -0.431301129654014016E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 0.192675040339660014E-01 -0.119991063440438991E-02 -0.438790369133317979E-03 -0.117513383061983992E-02 0.194438480799019008E-01 -0.161600618881618991E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.172964964024612013E-01 -0.929759484961031952E-07 -0.162354606074604997E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.102674406185603999E-05 0.173051827906571011E-01 -0.166666666666667011E-01 -0.143972810900854002E-03 0.173505492137603010E-01 -0.481358872158661014E-04 -0.558385319861011051E-03 -0.438374229680275993E-03 0.187218708148952986E-01 -0.166666666666667011E-01 -0.139937822338344987E-03 0.174078896544059003E-01 -0.227983603063990998E-07 -0.141801732328176988E-03 -0.908683871102670049E-06 0.178144027016026996E-01 -0.143597578982917004E-03 0.178145584095829000E-01 -0.106316067196835009E-05 -0.166666666666667011E-01 -0.140417715032831007E-03 -0.268130638269802001E-07 0.174078940527996016E-01 -0.166666666666667011E-01 -0.434071259447730998E-03 0.193423072809523998E-01 -0.122826363249218997E-02 -0.436064294450999015E-03 -0.116769061111692994E-02 0.194456918207231988E-01 -0.166666666666667011E-01 0.172982030081412011E-01 -0.280949762784711011E-07 -0.166666666666667011E-01 -0.189517761832509992E-05 0.178271977135750007E-01 -0.166666666666667011E-01 0.178265008932085994E-01 -0.134657557022567991E-05 -0.925862840583118960E-06 0.178144080134048990E-01 -0.166666666666667011E-01 -0.434780916834727992E-03 0.193789511951828995E-01 -0.125220905631005992E-02 -0.435357601613535017E-03 -0.118147901946294995E-02 0.194624869766942994E-01 -0.140177592469087011E-03 -0.166666666666667011E-01 0.177492253272222983E-01 -0.496756672234455021E-07 0.000000000000000000E+00 -0.140414345842645999E-03 -0.517733155226927036E-06 0.178140139187708997E-01 -0.166666666666667011E-01 -0.435069391154579001E-03 0.193884418550169003E-01 -0.125613776648296005E-02 -0.435069392971158982E-03 -0.118121619070897998E-02 0.194633634714499008E-01 -0.166666666666667011E-01 -0.139670760166673013E-03 0.172964743997286001E-01 -0.239698098600330014E-07 -0.141639610050856989E-03 -0.121288226150826994E-05 0.178147057806271984E-01 -0.141639611820286996E-03 0.178147057807866993E-01 -0.121288242338980006E-05 -0.166666666666667011E-01 -0.139670760473278990E-03 -0.239698131526809996E-07 0.172964743997317989E-01 -0.166666666666667011E-01 -0.435069393628180999E-03 0.194636361556436016E-01 -0.118148289887844995E-02 -0.435069390497556984E-03 -0.125641041453345998E-02 0.193887085193421987E-01 -0.139670572892313988E-03 -0.166666666666667011E-01 0.172964393935715992E-01 -0.239416569730802012E-07 0.000000000000000000E+00 -0.141638062327426994E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.121132811726249994E-05 0.178146133160351008E-01 0.000000000000000000E+00 -0.435065005443445983E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.193885567700676983E-01 -0.125634384183789002E-02 -0.435073778621178993E-03 -0.118148063714991003E-02 0.194636163660859007E-01 -0.139670682267266008E-03 -0.166666666666667011E-01 0.172964394328304003E-01 -0.239712554208225998E-07 -0.141639519995227992E-03 -0.121303220633213010E-05 0.178147059371244997E-01 -0.166666666666667011E-01 -0.435069393052231988E-03 0.194636362649158005E-01 -0.118148299187565992E-02 -0.435069391073505994E-03 -0.125641054065752994E-02 0.193887086398788003E-01 -0.139670722089359006E-03 -0.166666666666667011E-01 0.172964393787637984E-01 -0.239432406825611007E-07 -0.141638922827194003E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.121140585286092006E-05 0.178146133355806986E-01 0.000000000000000000E+00 -0.435065002826116019E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.193885565492021017E-01 -0.125634366816502010E-02 -0.435073781238435990E-03 -0.118148050457107996E-02 0.194636162212637985E-01 -0.139670533104681001E-03 -0.166666666666667011E-01 0.172964394486108987E-01 -0.239696530874210007E-07 -0.141638658891901002E-03 -0.121295339218169004E-05 0.178147058622277997E-01 -0.435090507624198979E-03 -0.166666666666667011E-01 0.194669892783514015E-01 -0.118530036438423006E-02 -0.435048275086066013E-03 -0.125913754589597991E-02 0.193915247086616990E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.172960984744740011E-01 -0.213609391462743998E-07 -0.166666666666667011E-01 -0.106367312934038010E-05 0.178056546914526007E-01 -0.166666666666667011E-01 0.173098191165480010E-01 -0.233466429807012998E-07 -0.116342624068299999E-05 0.178149495945450001E-01 -0.166666666666667011E-01 -0.435069781984934981E-03 0.194633224227003000E-01 -0.118118559577364002E-02 -0.435069002140319990E-03 -0.125608528610373000E-02 0.193883926903337007E-01 -0.139659157768774999E-03 -0.166666666666667011E-01 0.172964811071458988E-01 -0.238453432888061996E-07 -0.141572594197268990E-03 -0.120675029799499991E-05 0.178147008767080006E-01 -0.141538198842383991E-03 0.178146978502537016E-01 -0.120361768174549009E-05 -0.166666666666667011E-01 -0.139653197525385994E-03 -0.237816337684043011E-07 0.172964810481539000E-01 -0.166666666666667011E-01 -0.435101709399810996E-03 0.194659175728730989E-01 -0.118451613849551002E-02 -0.435037071408026995E-03 -0.125773914873593992E-02 0.193902105130663999E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.172962783353253005E-01 -0.218342668577251013E-07 -0.166666666666667011E-01 -0.109032448304442004E-05 0.178073744716683996E-01 0.000000000000000000E+00 -0.166666666666667011E-01 -0.434677413069656997E-03 0.193010409680305015E-01 -0.117438144585429007E-02 -0.435460867464844024E-03 -0.110662888767641991E-02 0.193848162965736988E-01 -0.166666666666667011E-01 0.173066829056793998E-01 -0.235026937711214014E-07 -0.117850668811869000E-05 0.178149746842341007E-01 -0.434571088711364992E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.192763238295290996E-01 -0.115086473839449998E-02 -0.435566874652637017E-03 -0.108492248963690989E-02 0.193620926343746995E-01 -0.139700128428728001E-03 -0.166666666666667011E-01 0.172964062536986006E-01 -0.947853442084885955E-08 -0.141287229661082013E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.427116700035002018E-06 0.177495998236214987E-01 -0.166666666666667011E-01 -0.139670101692640009E-03 0.172964394288557984E-01 -0.239650162888405991E-07 -0.141636168849436004E-03 -0.121272540531506999E-05 0.178147056226493003E-01 -0.166666666666667011E-01 -0.435070601601427007E-03 0.194632424803467992E-01 -0.118112723413059005E-02 -0.435068182519665007E-03 -0.125598135252497989E-02 0.193882951902035992E-01 -0.140350019130283996E-03 0.178144321328113989E-01 -0.934070655061730960E-06 -0.166666666666667011E-01 -0.140500665652292997E-03 -0.934901878808751052E-06 0.178144580209842994E-01 -0.166666666666667011E-01 -0.435068213377728019E-03 0.193838387473541998E-01 -0.125145538065708989E-02 -0.435070570743591027E-03 -0.117665454921121997E-02 0.194586920952139995E-01 -0.166666666666667011E-01 0.172970581688366015E-01 -0.800807256662095042E-18 -0.166666666666667011E-01 -0.116362368362492996E-16 0.173226956581742017E-01 -0.166666666666667011E-01 0.173227088035448999E-01 -0.227142423077076990E-07 -0.109342148655364995E-05 0.178145868761214014E-01 -0.166666666666667011E-01 -0.139356858353544992E-03 0.172964916979878001E-01 -0.206044330373597012E-07 -0.139859905327302997E-03 -0.104722219521747004E-05 0.178145504281299986E-01 -0.435504189131306018E-03 -0.166666666666667011E-01 0.194761567651129015E-01 -0.120248342376421996E-02 0.000000000000000000E+00 -0.434633990251299008E-03 -0.125640745877263009E-02 0.193901277969210986E-01 -0.166666666666667011E-01 -0.435069416312690977E-03 0.000000000000000000E+00 0.194633539559233987E-01 -0.118120745095251002E-02 -0.435069367813045000E-03 -0.125612755432325993E-02 0.193884319840324990E-01 -0.166666666666667011E-01 -0.139670622926057006E-03 0.172964757091009988E-01 -0.239682454273153985E-07 -0.141638804410660994E-03 -0.121280234601187995E-05 0.178147056852437993E-01 -0.141603983295218001E-03 0.178147026131165002E-01 -0.120962066737389997E-05 -0.166666666666667011E-01 -0.139664589011853001E-03 -0.239035291403939993E-07 0.172964756492609006E-01 -0.166666666666667011E-01 -0.435069662231408008E-03 0.194635113006111984E-01 -0.118136680130286010E-02 -0.435069121894098010E-03 -0.125627780048987998E-02 0.193885794815036004E-01 -0.161604530780638008E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.172965258603989015E-01 -0.937092380803772050E-07 -0.162357917896980013E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.103420292441458007E-05 0.173052119919553000E-01 -0.139467894615960005E-03 -0.166666666666667011E-01 0.173042004359115001E-01 -0.214254882847721013E-07 -0.140431934037818008E-03 -0.107186911344032006E-05 0.178145703665167007E-01 -0.435495844207702974E-03 -0.166666666666667011E-01 0.194039307261216001E-01 -0.113156930142864010E-02 -0.434642344857009977E-03 -0.118520221101701007E-02 0.193210744918359996E-01 -0.166666666666667011E-01 -0.435081000308743025E-03 0.194615626446394997E-01 -0.117973440054902992E-02 -0.435057783388624999E-03 -0.125399790053982996E-02 0.193864078136761001E-01 -0.160852239681984991E-03 -0.166666666666667011E-01 0.172967516936828013E-01 -0.175963431721074013E-07 -0.161650172616747013E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.203401553309515001E-06 0.173054257802153014E-01 -0.139669192621242992E-03 -0.166666666666667011E-01 0.173052370218566010E-01 -0.234215439038825986E-07 -0.141543308308260998E-03 -0.116637645863834998E-05 0.178146593404366997E-01 -0.435080679695976980E-03 -0.166666666666667011E-01 0.194640875795392999E-01 -0.118216803173680995E-02 -0.435058104025100973E-03 -0.125653847166064996E-02 0.193888504672653991E-01 -0.415397501230701984E-03 -0.166666666666667011E-01 0.188279009057511007E-01 -0.898333912356110006E-03 -0.453392657545613985E-03 -0.100006119280005999E-02 0.192410594709606017E-01 -0.166666666666667011E-01 0.173196351930705005E-01 -0.236100996593661996E-07 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.123805471953726010E-05 0.178172764971170003E-01 -0.166666666666667011E-01 0.172958803103070990E-01 -0.240055324597324986E-07 -0.121597905415154999E-05 0.178147088629611004E-01 -0.166666666666667011E-01 -0.139360440123895005E-03 0.172964135806168012E-01 -0.206282988179274991E-07 -0.139848706255163002E-03 -0.104818854429194995E-05 0.178144673884889013E-01 -0.482623514256753016E-03 -0.166666666666667011E-01 0.184436867195339992E-01 -0.182833591562523009E-03 -0.374919021333009015E-03 -0.210075563706943995E-03 0.183321751743828996E-01 -0.143208310342222002E-03 0.178148148672475012E-01 -0.128486278788734005E-05 -0.166666666666667011E-01 -0.199920559280411991E-03 -0.199737151953955009E-05 0.178295061512010987E-01 -0.166666666666667011E-01 -0.433603816949246982E-03 0.193497617607279009E-01 -0.124503446125193997E-02 -0.436528005342376002E-03 -0.119072566655100006E-02 0.194675617176627014E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.172951192319563997E-01 -0.214703229569797989E-07 -0.166666666666667011E-01 -0.107141837494050999E-05 0.178058595667948995E-01 0.000000000000000000E+00 -0.166666666666667011E-01 -0.434603197477276975E-03 0.000000000000000000E+00 0.193058537144648011E-01 -0.118063268602804994E-02 -0.435534875277424982E-03 -0.111369468853339993E-02 0.193919603590869995E-01 -0.166666666666667011E-01 0.173056689021276987E-01 -0.235558337294687006E-07 -0.118363055424740992E-05 0.178149857852361003E-01 -0.435171466639532013E-03 -0.166666666666667011E-01 0.194709518243956006E-01 -0.119094077767639001E-02 -0.434967284384675014E-03 -0.126075351231549003E-02 0.193933220864733989E-01 -0.435279548009691016E-03 -0.166666666666667011E-01 0.194122108470178995E-01 -0.114336878037841995E-02 -0.434859088315804978E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.119657203671675997E-02 0.193454506408375984E-01 -0.166666666666667011E-01 -0.139670792135546010E-03 0.172964364510711996E-01 -0.239726277026338997E-07 -0.141640184749274994E-03 -0.121310638409450998E-05 0.178147060027833010E-01 -0.166666666666667011E-01 -0.140348082594213009E-03 0.178144298829873010E-01 -0.933774300842211964E-06 -0.140348090712458993E-03 -0.933802946513419972E-06 0.178144318617154997E-01 -0.435069485964877014E-03 0.194636360495127995E-01 -0.118148312545114003E-02 -0.166666666666667011E-01 -0.435069298160832996E-03 -0.125640935270473000E-02 0.193887056176578997E-01 -0.166666666666667011E-01 -0.139670745411772993E-03 0.172964393771041017E-01 -0.239719351629878006E-07 -0.141639884975546990E-03 -0.121306573574195002E-05 0.178147059632727985E-01 -0.166666666666667011E-01 0.193887976520839000E-01 -0.125650164472002990E-02 -0.166666666666667011E-01 -0.118157208029668996E-02 0.194637273004341985E-01 -0.166666666666667011E-01 0.193887977203370011E-01 -0.125650167109275009E-02 -0.118157208884183003E-02 0.194637273863745991E-01 -0.166666666666667011E-01 -0.435069390354664992E-03 0.193887118325760006E-01 -0.125641384488361994E-02 -0.435069393771072991E-03 -0.118148619567291996E-02 0.194636395582745009E-01 -0.139674686773609991E-03 -0.166666666666667011E-01 0.173041492371790000E-01 -0.235404446357735999E-07 0.000000000000000000E+00 -0.141584559729122987E-03 -0.117458827916782993E-05 0.178146676653142995E-01 -0.166666666666667011E-01 -0.139670743898058004E-03 0.000000000000000000E+00 0.172964393844322988E-01 -0.239719184224240012E-07 -0.141639876162939988E-03 -0.121306489605736002E-05 0.178147059624353017E-01 -0.166666666666667011E-01 -0.140348087318788991E-03 0.178144318414940016E-01 -0.933801041250177032E-06 -0.140348087395501987E-03 -0.933801311940696952E-06 0.178144318601919996E-01 -0.435069392951675977E-03 0.194636364109944011E-01 -0.118148311441449989E-02 -0.166666666666667011E-01 -0.435069391174062005E-03 -0.125641070624533989E-02 0.193887087793657986E-01 -0.166666666666667011E-01 -0.435069391183394005E-03 0.193887815136928009E-01 -0.125648507926827001E-02 -0.435069392942343978E-03 -0.118155584839527996E-02 0.194637107839485991E-01 -0.139700198051380994E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.172964151566069001E-01 -0.947849285071832001E-08 -0.141287467734219009E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.427106386139514992E-06 0.177495992957071996E-01 -0.140178145272992012E-03 -0.166666666666667011E-01 0.177492248109030991E-01 -0.496797001355079032E-07 -0.140415042956877005E-03 -0.517779462070214031E-06 0.178140139540239988E-01 -0.435069391423600026E-03 -0.166666666666667011E-01 0.193887083934986999E-01 -0.125641030816468009E-02 -0.435069392702138011E-03 -0.118148272213002004E-02 0.194636360081575990E-01 -0.166666666666667011E-01 -0.139670744267195992E-03 0.172964393844029993E-01 -0.239719223657696016E-07 -0.141639878293219989E-03 -0.121306508981309993E-05 0.178147059625506990E-01 -0.435069391344719005E-03 -0.166666666666667011E-01 0.193887083608434013E-01 -0.125641029540731006E-02 -0.435069392781018977E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.118148271833985003E-02 0.194636359700672001E-01 -0.435069391241579015E-03 -0.166666666666667011E-01 0.193887087819448016E-01 -0.125641070755089994E-02 -0.435069392884159022E-03 -0.118148311473540010E-02 0.194636364115348993E-01 -0.139670743900726009E-03 -0.166666666666667011E-01 0.172964393843643011E-01 -0.239719184546282004E-07 -0.141639876179015995E-03 -0.121306489779411998E-05 0.178147059624346009E-01 -0.435069391188663010E-03 -0.166666666666667011E-01 0.193887118630544017E-01 -0.125641385909356002E-02 -0.435069392937075027E-03 -0.118148619756065993E-02 0.194636395637313997E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.172965077542917012E-01 -0.927568247375718968E-07 -0.166666666666667011E-01 -0.102046200973759010E-05 0.173051339160060005E-01 -0.166666666666667011E-01 0.193887977549708004E-01 -0.125650168627735996E-02 -0.118157208932236990E-02 0.194637273908664990E-01 -0.166666666666667011E-01 -0.435069391170066991E-03 0.193884418583467992E-01 -0.125613776955457008E-02 -0.435069392955670991E-03 -0.118121619348252006E-02 0.194633634743846991E-01 -0.139670761454596990E-03 -0.166666666666667011E-01 0.172964743993801011E-01 -0.239698237251752988E-07 -0.141639617487515998E-03 -0.121288294408737000E-05 0.178147057813331997E-01 -0.141639617489162006E-03 0.178147057813334009E-01 -0.121288294423791994E-05 -0.166666666666667011E-01 -0.139670761454882000E-03 -0.239698237282632997E-07 0.172964743993801011E-01 -0.166666666666667011E-01 -0.435069391188130992E-03 0.193887118630363987E-01 -0.125641385908596996E-02 -0.435069392937606990E-03 -0.118148619756086007E-02 0.194636395637294013E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.172965077542917012E-01 -0.927568247372701942E-07 -0.166666666666667011E-01 -0.102046200973822008E-05 0.173051339160060005E-01 0.000000000000000000E+00 -0.166666666666667011E-01 -0.139674680709100989E-03 0.173041376249854009E-01 -0.235410513736760014E-07 -0.141584640574256011E-03 -0.117464392253588008E-05 0.178146677207011994E-01 -0.166666666666667011E-01 0.193887977549706998E-01 -0.125650168627732006E-02 -0.118157208932236990E-02 0.194637273908664990E-01 -0.139674680709099010E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.173041376249854009E-01 -0.235410513737710014E-07 -0.141584640574248991E-03 -0.117464392253522998E-05 0.178146677207011994E-01 -0.161601715880123993E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.172965076669070010E-01 -0.926692831361944028E-07 -0.162351548531821993E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.102032125850803006E-05 0.173051453880029987E-01 -0.166666666666667011E-01 -0.139670774820815007E-03 0.172964393819063991E-01 -0.239722506017439995E-07 -0.141640054670844003E-03 -0.121308123358901007E-05 0.178147059780696011E-01 -0.166666666666667011E-01 -0.435069333337653979E-03 0.193887254401867992E-01 -0.125642901506816990E-02 -0.435069450788072999E-03 -0.118150189097284992E-02 0.194636551938420013E-01 -0.140349304427809005E-03 0.178144320166155994E-01 -0.933957824287751043E-06 -0.166666666666667011E-01 -0.140349299344085009E-03 -0.933939903071356043E-06 0.178144307788948991E-01 -0.166666666666667011E-01 -0.435068660852071026E-03 0.000000000000000000E+00 0.193848776728661985E-01 -0.125250844957367998E-02 -0.435070123271966982E-03 -0.117767775440582993E-02 0.194597409817202013E-01 -0.166666666666667011E-01 0.172969944929179005E-01 -0.779115314308188993E-07 -0.166666666666667011E-01 -0.755324938795983959E-05 0.179004996942120007E-01 -0.166666666666667011E-01 0.178976999748934017E-01 -0.387667548982099008E-05 -0.233628102656076998E-06 0.178135930268625009E-01 -0.166666666666667011E-01 -0.139686009466056008E-03 0.172964370591520006E-01 -0.241363103321880989E-07 -0.141728009792282988E-03 -0.122115128991173003E-05 0.178147135015222988E-01 -0.435039396203468979E-03 -0.166666666666667011E-01 0.193948601328561010E-01 -0.126336416897148991E-02 0.000000000000000000E+00 -0.435099385069850019E-03 -0.118873902080404996E-02 0.194708368876741998E-01 -0.166666666666667011E-01 -0.435069389255753991E-03 0.000000000000000000E+00 0.193884422504904987E-01 -0.125613821100519006E-02 -0.435069394869983992E-03 -0.118121665337716998E-02 0.194633639337775007E-01 -0.166666666666667011E-01 -0.139671380124086001E-03 0.172964743448792008E-01 -0.239764688219899014E-07 -0.141643188367961012E-03 -0.121320975071175002E-05 0.178147060922408014E-01 -0.141639844172828988E-03 0.178147058017989017E-01 -0.121290395199703009E-05 -0.166666666666667011E-01 -0.139670800627344013E-03 -0.239702482699698004E-07 0.172964743394126014E-01 -0.166666666666667011E-01 -0.435068235431639017E-03 0.193889053227464005E-01 -0.125663654535589997E-02 -0.435070548689851008E-03 -0.118172116532546008E-02 0.194638725033117015E-01 -0.161678337891334005E-03 -0.166666666666667011E-01 0.172964935301390017E-01 -0.110214153364595997E-06 -0.162422470311079996E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.120622187414538000E-05 0.173052150067064016E-01 -0.139675704206640992E-03 -0.166666666666667011E-01 0.173040340098533016E-01 -0.235576501556051000E-07 -0.141591382929153007E-03 -0.117568258886253002E-05 0.178146687262423983E-01 -0.435067411943173014E-03 -0.166666666666667011E-01 0.193886625537611004E-01 -0.125640355917502997E-02 -0.435071372170113015E-03 -0.118150457751595010E-02 0.194636501950278991E-01 -0.166666666666667011E-01 -0.435069317670600008E-03 0.193887257892246985E-01 -0.125642966435953992E-02 -0.435069466455119977E-03 -0.118150274118994998E-02 0.194636560434848990E-01 -0.161601801933608996E-03 -0.166666666666667011E-01 0.172965054748796011E-01 -0.926502679423437959E-07 -0.162351306454973002E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.101988138442591992E-05 0.173051393629530988E-01 -0.139695639987958987E-03 -0.166666666666667011E-01 0.173041438368830985E-01 -0.237586532625412003E-07 -0.141701626146447995E-03 -0.118517343233528007E-05 0.178146776002246002E-01 -0.435031309395256976E-03 -0.166666666666667011E-01 0.193945788398414004E-01 -0.126324081468956006E-02 -0.435107470132009989E-03 -0.118873511389969004E-02 0.194708001084177990E-01 -0.435268203178918020E-03 -0.166666666666667011E-01 0.194803280841455984E-01 -0.120221471825937998E-02 -0.434870455402744990E-03 -0.126728100453280001E-02 0.194000950994309002E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.172950455023187996E-01 -0.209695226068837989E-07 -0.166666666666667011E-01 -0.104309478839938997E-05 0.178040398395854990E-01 -0.166666666666667011E-01 0.173086281051332003E-01 -0.234055546297331013E-07 -0.116921175366159004E-05 0.178149613334783012E-01 -0.166666666666667011E-01 -0.141165531303230992E-03 0.172962108365398992E-01 -0.473445694663900975E-05 -0.544660594244081005E-03 -0.346550124345780994E-04 0.183333673483142001E-01 -0.139754726222275990E-03 -0.166666666666667011E-01 0.176170084219247000E-01 -0.246149781579885013E-07 -0.140206540519596996E-03 -0.567788699151559999E-06 0.178140588497450007E-01 -0.165095492820345002E-03 0.178223326710730996E-01 -0.681196695085373968E-05 -0.166666666666667011E-01 -0.150450799069420005E-03 -0.294765827683784011E-06 0.176166680355617014E-01 -0.166666666666667011E-01 -0.421169440873449990E-03 0.189193905894529009E-01 -0.945436157372043042E-03 -0.448291280314557015E-03 -0.100548420996317994E-02 0.192537236307864998E-01 -0.166666666666667011E-01 0.173199119681975988E-01 -0.235984477489147988E-07 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 0.000000000000000000E+00 -0.123676178966295993E-05 0.178172740173286014E-01 -0.166666666666667011E-01 -0.435171428461128974E-03 0.194709389729267983E-01 -0.119092785894799004E-02 -0.434967322587667975E-03 -0.126074169224678996E-02 0.193933111644412001E-01 -0.166666666666667011E-01 0.172958818582512990E-01 -0.240054389194565998E-07 -0.121597096057541996E-05 0.178147088549035007E-01 -0.434490077563752020E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 0.192819871000558009E-01 -0.115810409202821997E-02 -0.435647599072524984E-03 -0.109302488139546004E-02 0.193702791629623006E-01 -0.435071227206677987E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.194623470628700994E-01 -0.118137412448114003E-02 -0.435067556908351005E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.125600599177529007E-02 0.193876331694533000E-01 -0.166666666666667011E-01 -0.139670743889624998E-03 0.172964393844356017E-01 -0.239719183317204011E-07 -0.141639876114236000E-03 -0.121306489159398005E-05 0.178147059624313014E-01 -0.166666666666667011E-01 -0.140348087313353994E-03 0.178144318418225998E-01 -0.933801044951077008E-06 -0.140348087388714990E-03 -0.933801310870910963E-06 0.178144318601911010E-01 -0.435069392936102985E-03 0.194636364108025997E-01 -0.118148311410958993E-02 -0.166666666666667011E-01 -0.435069391189634998E-03 -0.125641070617519006E-02 0.193887087796365994E-01 -0.166666666666667011E-01 -0.139670745273937991E-03 0.172964393844580004E-01 -0.239719332022523000E-07 -0.141639884104326007E-03 -0.121306562274284990E-05 0.178147059631592990E-01 -0.166666666666667011E-01 0.193887976615549004E-01 -0.125650164862212989E-02 -0.166666666666667011E-01 -0.118157208096353003E-02 0.194637273070949017E-01 -0.166666666666667011E-01 0.193887977248977002E-01 -0.125650167309232009E-02 -0.118157208890513009E-02 0.194637273869662994E-01 -0.166666666666667011E-01 -0.435069391187089996E-03 0.193887118682170012E-01 -0.125641386440402997E-02 -0.435069392938647987E-03 -0.118148620277655990E-02 0.194636395690588986E-01 -0.139674686768426990E-03 -0.166666666666667011E-01 0.173041492303226997E-01 -0.235404449782952015E-07 0.000000000000000000E+00 -0.141584559767933007E-03 -0.117458831126650007E-05 0.178146676653491987E-01 -0.166666666666667011E-01 -0.139670743889684006E-03 0.000000000000000000E+00 0.172964393844356017E-01 -0.239719183323613007E-07 -0.141639876114578011E-03 -0.121306489162555998E-05 0.178147059624313986E-01 -0.166666666666667011E-01 -0.140348087304587000E-03 0.178144318418189986E-01 -0.933801043795399019E-06 -0.140348087379959000E-03 -0.933801309750847956E-06 0.178144318601899006E-01 -0.435069392936221976E-03 0.194636364108877989E-01 -0.118148311419265998E-02 -0.166666666666667011E-01 -0.435069391189516982E-03 -0.125641070625854005E-02 0.193887087797160983E-01 -0.166666666666667011E-01 -0.435069391192432022E-03 0.193887815134137984E-01 -0.125648507878967997E-02 -0.435069392933306015E-03 -0.118155584779333005E-02 0.194637107833882002E-01 -0.139700198049885988E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.172964151566860000E-01 -0.947849280219223922E-08 -0.141287467724896009E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.427106383695399994E-06 0.177495992954452009E-01 -0.140178145690196993E-03 -0.166666666666667011E-01 0.177492248106932010E-01 -0.496797033573775009E-07 -0.140415043482606990E-03 -0.517779497585701998E-06 0.178140139541216984E-01 -0.435069391089031011E-03 -0.166666666666667011E-01 0.193887087977068009E-01 -0.125641072675559008E-02 -0.435069393036708002E-03 -0.118148313570800006E-02 0.194636364323594983E-01 -0.166666666666667011E-01 -0.139670745392212007E-03 0.172964393844482998E-01 -0.239719344734911991E-07 -0.141639884787109996E-03 -0.121306568527222010E-05 0.178147059632214992E-01 -0.435069390245067006E-03 -0.166666666666667011E-01 0.193887086999318010E-01 -0.125641068746266005E-02 -0.435069393880670977E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.118148312670310991E-02 0.194636363420862997E-01 -0.435069390179441007E-03 -0.166666666666667011E-01 0.193887087448267985E-01 -0.125641069098422005E-02 -0.435069393946296975E-03 -0.118148311374637006E-02 0.194636364063980986E-01 -0.139670743899172997E-03 -0.166666666666667011E-01 0.172964393844271015E-01 -0.239719184348657009E-07 -0.141639876169436012E-03 -0.121306489668503007E-05 0.178147059624364015E-01 -0.435069391188929019E-03 -0.166666666666667011E-01 0.193887118630633008E-01 -0.125641385909733001E-02 -0.435069392936809993E-03 -0.118148619756053004E-02 0.194636395637322983E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.172965077542917983E-01 -0.927568247378536014E-07 -0.166666666666667011E-01 -0.102046200973724006E-05 0.173051339160060005E-01 -0.166666666666667011E-01 0.193887977549708004E-01 -0.125650168627737991E-02 -0.118157208932236990E-02 0.194637273908664990E-01 -0.166666666666667011E-01 -0.435069391171351012E-03 0.193884418583585988E-01 -0.125613776953759993E-02 -0.435069392954388001E-03 -0.118121619344631009E-02 0.194633634743581994E-01 -0.139670761454455990E-03 -0.166666666666667011E-01 0.172964743993848993E-01 -0.239698237233977009E-07 -0.141639617486649991E-03 -0.121288294398642993E-05 0.178147057813330990E-01 -0.141639617486657987E-03 0.178147057813330990E-01 -0.121288294398714991E-05 -0.166666666666667011E-01 -0.139670761454456992E-03 -0.239698237233852998E-07 0.172964743993848993E-01 -0.166666666666667011E-01 -0.435069391188925984E-03 0.193887118630632002E-01 -0.125641385909728990E-02 -0.435069392936811999E-03 -0.118148619756053004E-02 0.194636395637322983E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.172965077542917983E-01 -0.927568247378527941E-07 -0.166666666666667011E-01 -0.102046200973724006E-05 0.173051339160060005E-01 0.000000000000000000E+00 -0.166666666666667011E-01 -0.139674680709099010E-03 0.173041376249854009E-01 -0.235410513736267014E-07 -0.141584640574244004E-03 -0.117464392253481007E-05 0.178146677207011994E-01 -0.166666666666667011E-01 0.193887977549708004E-01 -0.125650168627737991E-02 -0.118157208932236990E-02 0.194637273908664990E-01 -0.139674680709099010E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.173041376249854009E-01 -0.235410513735998015E-07 -0.141584640574244004E-03 -0.117464392253481007E-05 0.178146677207011994E-01 -0.161601720324868993E-03 -0.166666666666667011E-01 0.172965076669642989E-01 -0.926701979568355970E-07 -0.162351552541144001E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.102033090018047992E-05 0.173051453908108985E-01 -0.166666666666667011E-01 -0.435069391175761004E-03 0.193884418585090999E-01 -0.125613776960263991E-02 -0.435069392949976978E-03 -0.118121619344674009E-02 0.194633634743759006E-01 -0.166666666666667011E-01 -0.139670761454455990E-03 0.172964743993851006E-01 -0.239698237233875994E-07 -0.141639617486650994E-03 -0.121288294398543001E-05 0.178147057813330990E-01 -0.141639617476079996E-03 0.178147057813322005E-01 -0.121288294301813998E-05 -0.166666666666667011E-01 -0.139670761452624013E-03 -0.239698237036860001E-07 0.172964743993851006E-01 -0.166666666666667011E-01 -0.435069391192448990E-03 0.193887815134013014E-01 -0.125648507877692997E-02 -0.435069392933288993E-03 -0.118155584778073010E-02 0.194637107833748012E-01 -0.166666666666667011E-01 0.172964151566857016E-01 -0.947849086778118929E-08 -0.166666666666667011E-01 -0.427106298729459992E-06 0.177495992959284983E-01 -0.166666666666667011E-01 0.177492248112494991E-01 -0.496796927082655975E-07 -0.517779383578772961E-06 0.178140139540073003E-01 -0.166666666666667011E-01 -0.435069391188929019E-03 0.193887118675337006E-01 -0.125641386366846992E-02 -0.435069392936809993E-03 -0.118148620203093000E-02 0.194636395683033987E-01 -0.139674686766604988E-03 -0.166666666666667011E-01 0.173041492306688985E-01 -0.235404449394736996E-07 0.000000000000000000E+00 -0.141584559754340987E-03 -0.117458830859994993E-05 0.178146676653464994E-01 -0.166666666666667011E-01 -0.139670743889622992E-03 0.000000000000000000E+00 0.172964393844356017E-01 -0.239719183316415013E-07 -0.141639876114222989E-03 -0.121306489159275990E-05 0.178147059624313014E-01 -0.166666666666667011E-01 -0.140348087304557998E-03 0.178144318418215000E-01 -0.933801043825826983E-06 -0.140348087379918993E-03 -0.933801309744065975E-06 0.178144318601899006E-01 -0.435069392936097998E-03 0.194636364108022007E-01 -0.118148311410921003E-02 -0.166666666666667011E-01 -0.435069391189641015E-03 -0.125641070617487000E-02 0.193887087796364016E-01 -0.166666666666667011E-01 -0.435069391192461025E-03 0.193887815133983003E-01 -0.125648507877364007E-02 -0.435069392933277012E-03 -0.118155584777734001E-02 0.194637107833714011E-01 -0.139700198047769001E-03 -0.166666666666667011E-01 0.172964151566880990E-01 -0.947849280983375026E-08 -0.141287467715877995E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.427106384123253011E-06 0.177495992955506998E-01 -0.140178145551705995E-03 -0.166666666666667011E-01 0.177492248107965003E-01 -0.496797023203806995E-07 -0.140415043308048001E-03 -0.517779485956588989E-06 0.178140139541092986E-01 -0.435069391188775008E-03 -0.166666666666667011E-01 0.193887087796628006E-01 -0.125641070621944004E-02 -0.435069392936964004E-03 -0.118148311416520994E-02 0.194636364108560014E-01 -0.166666666666667011E-01 -0.139670745283318996E-03 0.172964393844581010E-01 -0.239719333030583992E-07 -0.141639884158474994E-03 -0.121306562769789989E-05 0.178147059631642984E-01 -0.435069390314679995E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.193887086857719992E-01 -0.125641066837268002E-02 -0.435069393811057987E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.118148310576349008E-02 0.194636363266590985E-01 0.000000000000000000E+00 -0.435069390314679995E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.193887087494363994E-01 -0.125641069296695000E-02 -0.435069393811057987E-03 -0.118148311374545001E-02 0.194636364069358005E-01 -0.139670743897207989E-03 -0.166666666666667011E-01 0.172964393844356988E-01 -0.239719184131230996E-07 -0.141639876158003994E-03 -0.121306489559900995E-05 0.178147059624353017E-01 -0.435069391188779996E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.193887088940556986E-01 -0.125641082318989007E-02 -0.435069392936957987E-03 -0.118148322855787997E-02 0.194636365278264001E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.172964393409659987E-01 -0.239438466648548014E-07 -0.166666666666667011E-01 -0.121143526110419994E-05 0.178146131934183993E-01 -0.166666666666667011E-01 0.172964393844281007E-01 -0.239719133478075007E-07 -0.121306464654854992E-05 0.178147059621872987E-01 -0.166666666666667011E-01 -0.435069391175080993E-03 0.193884418584855008E-01 -0.125613776959208998E-02 -0.435069392950656990E-03 -0.118121619344611992E-02 0.194633634743727989E-01 -0.139670761454532995E-03 -0.166666666666667011E-01 0.172964743993852012E-01 -0.239698237241572016E-07 -0.141639617487093999E-03 -0.121288294402584004E-05 0.178147057813331997E-01 -0.141639617487093999E-03 0.178147057813331997E-01 -0.121288294402580997E-05 -0.166666666666667011E-01 -0.139670761454532995E-03 -0.239698237241842008E-07 0.172964743993852012E-01 -0.166666666666667011E-01 -0.435069391188779996E-03 0.193887088940556986E-01 -0.125641082318989007E-02 -0.435069392936957987E-03 -0.118148322855787997E-02 0.194636365278264001E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.172964393409659987E-01 -0.239438466648548014E-07 -0.166666666666667011E-01 -0.121143526110419994E-05 0.178146131934183993E-01 0.000000000000000000E+00 -0.166666666666667011E-01 -0.435064994898318983E-03 0.000000000000000000E+00 0.193885567754814997E-01 -0.125634406113793993E-02 -0.435073789166010982E-03 -0.118148100321111005E-02 0.194636166968597014E-01 -0.166666666666667011E-01 0.172964393844281007E-01 -0.239719133478075007E-07 -0.121306464654854992E-05 0.178147059621872987E-01 -0.435064994898318983E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 0.193885567754814997E-01 -0.125634406113793993E-02 -0.435073789166010982E-03 -0.118148100321111005E-02 0.194636166968597014E-01 -0.161601720097545002E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.172965076670156016E-01 -0.926701503617335961E-07 -0.162351552330414998E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.102033039390967000E-05 0.173051453906522996E-01 -0.166666666666667011E-01 -0.435069886305821003E-03 0.194633772872509006E-01 -0.118124110147639996E-02 -0.435068897819141016E-03 -0.125613724075302000E-02 0.193884430666162004E-01 -0.166666666666667011E-01 -0.139670761026265005E-03 0.172964744480312016E-01 -0.239698159440932015E-07 -0.141639614515128010E-03 -0.121288245172214010E-05 0.178147057808154992E-01 -0.141639611356817998E-03 0.178147057804570984E-01 -0.121288216154248992E-05 -0.166666666666667011E-01 -0.139670760479100993E-03 -0.239698100456143004E-07 0.172964744480210014E-01 -0.166666666666667011E-01 -0.435069393124601020E-03 0.194636395974100984E-01 -0.118148625156355995E-02 -0.435069391001137993E-03 -0.125641387127325998E-02 0.193887118958671993E-01 -0.166666666666667011E-01 0.172965077543725011E-01 -0.927568140336766949E-07 -0.166666666666667011E-01 -0.102046181269095990E-05 0.173051339147857995E-01 -0.166666666666667011E-01 0.173041376239574003E-01 -0.235410465544014012E-07 -0.117464369127346007E-05 0.178146677204709010E-01 -0.166666666666667011E-01 -0.435069392055963979E-03 0.193887089236092006E-01 -0.125641083591904006E-02 -0.435069392069774004E-03 -0.118148322857497003E-02 0.194636365312624987E-01 -0.435064994897326992E-03 -0.166666666666667011E-01 0.193885567754197990E-01 -0.125634406109512999E-02 0.000000000000000000E+00 -0.435073789167003027E-03 -0.118148100318356004E-02 0.194636166968270991E-01 -0.166666666666667011E-01 -0.139670744005780998E-03 0.000000000000000000E+00 0.172964393844374995E-01 -0.239719195794844008E-07 -0.141639876784672010E-03 -0.121306495294346998E-05 0.178147059624923984E-01 -0.166666666666667011E-01 -0.435069390971013978E-03 0.193887087720762998E-01 -0.125641070286031001E-02 -0.435069393154724005E-03 -0.118148311400433992E-02 0.194636364098218009E-01 -0.140348087384463996E-03 0.178144318601930994E-01 -0.933801313166044989E-06 -0.166666666666667011E-01 -0.140348087290235009E-03 -0.933800980670895958E-06 0.178144318372258983E-01 -0.166666666666667011E-01 -0.139670745283319999E-03 0.172964393844581010E-01 -0.239719333030395990E-07 -0.141639884158481011E-03 -0.121306562769829991E-05 0.178147059631642984E-01 -0.435069390314711004E-03 -0.166666666666667011E-01 0.193887086857633985E-01 -0.125641066836418010E-02 0.000000000000000000E+00 -0.435069393811026979E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.118148310575508990E-02 0.194636363266489990E-01 -0.435069390314652023E-03 -0.166666666666667011E-01 0.193887087494357992E-01 -0.125641069296695998E-02 -0.435069393811086014E-03 -0.118148311374589995E-02 0.194636364069360017E-01 -0.139670743896895007E-03 -0.166666666666667011E-01 0.172964393844356017E-01 -0.239719184097173985E-07 -0.141639876156198012E-03 -0.121306489543125995E-05 0.178147059624349999E-01 -0.166666666666667011E-01 -0.435069391192448990E-03 0.193887815133984009E-01 -0.125648507877395990E-02 -0.435069392933288993E-03 -0.118155584777784005E-02 0.194637107833718001E-01 -0.139700198047441003E-03 -0.166666666666667011E-01 0.172964151566879984E-01 -0.947849280858640987E-08 -0.141287467714373990E-03 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.427106384068703984E-06 0.177495992955511994E-01 -0.140178145551758009E-03 -0.166666666666667011E-01 0.177492248107972012E-01 -0.496797023215807996E-07 -0.140415043308110993E-03 -0.517779485960914998E-06 0.178140139541092986E-01 -0.435069391188743024E-03 -0.166666666666667011E-01 0.193887087796531001E-01 -0.125641070621038001E-02 -0.435069392936995988E-03 -0.118148311415690994E-02 0.194636364108470017E-01 -0.139670745278628006E-03 -0.166666666666667011E-01 0.172964393844581010E-01 -0.239719332526419989E-07 -0.141639884131401002E-03 -0.121306562522040010E-05 0.178147059631618004E-01 0.000000000000000000E+00 -0.166666666666667011E-01 0.193887976612414012E-01 -0.125650164849316990E-02 0.000000000000000000E+00 -0.166666666666667011E-01 -0.118157208094097992E-02 0.194637273068695993E-01 -0.166666666666667011E-01 0.172964393844281007E-01 -0.239719133478075007E-07 -0.121306464654854992E-05 0.178147059621872987E-01 -0.166666666666667011E-01 -0.139670744005789997E-03 0.172964393844378984E-01 -0.239719195795233015E-07 -0.141639876784720013E-03 -0.121306495294561001E-05 0.178147059624923984E-01 -0.435069390970999992E-03 -0.166666666666667011E-01 0.193887087720760014E-01 -0.125641070286029006E-02 -0.435069393154737991E-03 -0.118148311400450992E-02 0.194636364098219015E-01 -0.140348087384448004E-03 0.178144318601930994E-01 -0.933801313164241021E-06 -0.166666666666667011E-01 -0.140348087290217987E-03 -0.933800980664578045E-06 0.178144318372256000E-01 -0.166666666666667011E-01 -0.435069391192457989E-03 0.193887815133986993E-01 -0.125648507877402994E-02 -0.435069392933279994E-03 -0.118155584777777001E-02 0.194637107833719007E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.172964151566877000E-01 -0.947849244191978996E-08 -0.166666666666667011E-01 -0.427106377284980004E-06 0.177495993062723005E-01 -0.166666666666667011E-01 -0.140178145569362010E-03 0.177492248215363989E-01 -0.496797136596620033E-07 -0.140415043288559006E-03 -0.517779487315692952E-06 0.178140139541107002E-01 -0.166666666666667011E-01 0.193887977549708004E-01 -0.125650168627737991E-02 -0.118157208932236990E-02 0.194637273908664990E-01 -0.435069390319028026E-03 -0.166666666666667011E-01 0.193887087496057986E-01 -0.125641069305237993E-02 -0.435069393806710987E-03 -0.118148311376662990E-02 0.194636364069748005E-01 -0.139670783800487997E-03 -0.166666666666667011E-01 0.172964393410236991E-01 -0.239438517116492010E-07 -0.141639277731674996E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.121143550903037006E-05 0.178146131936444997E-01 -0.166666666666667011E-01 -0.435630521898627018E-03 0.193599440195765006E-01 -0.109077905787125992E-02 -0.434507214005837999E-03 -0.113826801288032008E-02 0.192764807435569993E-01 -0.166666666666667011E-01 -0.139536670047989993E-03 0.173126649368985017E-01 -0.217199091649137011E-07 -0.140761520198303991E-03 -0.106837084216960004E-05 0.178145668887513993E-01 -0.559739137191329981E-03 0.187342109887733005E-01 -0.453812468425848997E-03 -0.166666666666667011E-01 -0.144437867213283009E-03 -0.570571543797145032E-04 0.173662394826822990E-01 -0.166666666666667011E-01 -0.141554480273248993E-03 0.173902726296997004E-01 -0.612977187720352984E-05 -0.543269256103840045E-03 -0.155576690918531002E-04 0.183343607413550998E-01 -0.166666666666667011E-01 0.190435224881807010E-01 -0.109812319463910009E-02 -0.166666666666667011E-01 -0.111910148431286992E-02 0.192002058717063995E-01 -0.166666666666667011E-01 0.191810336689041995E-01 -0.116112314020530008E-02 -0.117547931931102998E-02 0.194335300141195988E-01 -0.166666666666667011E-01 -0.435330234208519001E-03 0.194710205585382004E-01 -0.119426070267823997E-02 -0.434808332877938987E-03 -0.125626104852707998E-02 0.193894446024132014E-01 -0.140176366249097992E-03 -0.166666666666667011E-01 0.177492266081854984E-01 -0.496668683742545015E-07 0.000000000000000000E+00 -0.140412798776869000E-03 -0.517630021288783052E-06 0.178140137898258992E-01 -0.166666666666667011E-01 -0.435069391186561989E-03 0.000000000000000000E+00 0.193884418600598005E-01 -0.125613777099214995E-02 -0.435069392939175994E-03 -0.118121619465940005E-02 0.194633634756198985E-01 -0.166666666666667011E-01 -0.139670763690922996E-03 0.172964743992523005E-01 -0.239698477564660998E-07 -0.141639630395104001E-03 -0.121288412595031991E-05 0.178147057825145012E-01 -0.141639614589797013E-03 0.178147057809878995E-01 -0.121288267825071997E-05 -0.166666666666667011E-01 -0.139670760952332988E-03 -0.239698183155154984E-07 0.172964743992174985E-01 -0.166666666666667011E-01 -0.435069386976256979E-03 0.193887089986198996E-01 -0.125641101907938003E-02 -0.435069397149481980E-03 -0.118148348210879005E-02 0.194636367633517007E-01 -0.139670105391945004E-03 -0.166666666666667011E-01 0.172964392993067012E-01 -0.239365206872189997E-07 0.000000000000000000E+00 -0.141635362111391987E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.121107470134998995E-05 0.178146126578259990E-01 0.000000000000000000E+00 -0.435064987171317002E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.193885562197462988E-01 -0.125634365104461995E-02 -0.435073796892796990E-03 -0.118148071382402999E-02 0.194636163662551993E-01 -0.139670481067569999E-03 -0.166666666666667011E-01 0.172964394190999009E-01 -0.239690924292986984E-07 -0.141638358730468997E-03 -0.121292585922250005E-05 0.178147058233791983E-01 -0.166666666666667011E-01 -0.435069392178805006E-03 0.194636362774779012E-01 -0.118148298166642000E-02 -0.435069391946934006E-03 -0.125641057978546000E-02 0.193887086723010006E-01 -0.139670520926417992E-03 -0.166666666666667011E-01 0.172964393709954013E-01 -0.239410464318514008E-07 -0.141637761129918003E-03 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.121129767514461000E-05 0.178146131114593001E-01 0.000000000000000000E+00 -0.435064997641161015E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.193885573025995017E-01 -0.125634454891261995E-02 -0.435073786423245982E-03 -0.118148144256120997E-02 0.194636171497057989E-01 -0.139670065452413992E-03 -0.166666666666667011E-01 0.172964393188763006E-01 -0.239646299428580996E-07 -0.141635960674409009E-03 -0.121270661878923007E-05 0.178147055903274988E-01 -0.434952598751025003E-03 -0.166666666666667011E-01 0.193531525592223995E-01 -0.122237998047711009E-02 -0.435186141503925985E-03 -0.114985437531923995E-02 0.194307215840422015E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.173112910791177986E-01 -0.253865277378783987E-04 -0.166666666666667011E-01 -0.138259363976029009E-03 0.178692219185750990E-01 -0.166666666666667011E-01 0.193634106903348008E-01 -0.124536794862644003E-02 -0.118108829657000993E-02 0.194595580860945985E-01 -0.166666666666667011E-01 -0.435070677644360025E-03 0.194631053235192995E-01 -0.118099346864504007E-02 -0.435068106476127980E-03 -0.125584229082854990E-02 0.193881574850998002E-01 -0.139730287993055990E-03 -0.166666666666667011E-01 0.172965129995646012E-01 -0.246134321794111988E-07 -0.141982715456250990E-03 -0.124443001707338994E-05 0.178147384368366989E-01 -0.141918001868253997E-03 0.178147321831826008E-01 -0.123839758836049003E-05 -0.166666666666667011E-01 -0.139719073147306011E-03 -0.244906574575426016E-07 0.172965128605519006E-01 -0.166666666666667011E-01 -0.434974886832897021E-03 0.193565238210357989E-01 -0.122539181989999007E-02 -0.435163868604747015E-03 -0.115249078341532994E-02 0.194335198475173998E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.173108110594776994E-01 -0.252486229488234996E-04 -0.166666666666667011E-01 -0.137635746082908988E-03 0.178684051236191986E-01 -0.166666666666667011E-01 -0.140103968298790010E-03 0.173357792454707990E-01 -0.263227882029069994E-07 -0.143405882709704010E-03 -0.122767817853530995E-05 0.178147220760386006E-01 -0.166666666666667011E-01 0.193641798191387000E-01 -0.124570775063769998E-02 -0.118110177773261002E-02 0.194596602746646002E-01 -0.140117598274002998E-03 -0.166666666666667011E-01 0.173359378634117996E-01 -0.264605452288720992E-07 -0.143470065352011001E-03 -0.123349701123627008E-05 0.178147283869216008E-01 -0.139699930872859987E-03 -0.166666666666667011E-01 0.172964334628613985E-01 -0.947718670749511931E-08 -0.141286093437001994E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.427033784487718019E-06 0.177496010453740000E-01 -0.166666666666667011E-01 -0.435116790829975981E-03 0.188852131512914009E-01 -0.625688270833019968E-03 -0.435021984600298978E-03 -0.676914763138863006E-03 0.188324695108036989E-01 -0.166666666666667011E-01 -0.139445451109797009E-03 0.174062618540683015E-01 -0.190292153995733009E-07 -0.139963261540874002E-03 -0.765758082281971048E-06 0.178142630176260988E-01 -0.532898885828498952E-03 0.187657876969587016E-01 -0.512856505158025013E-03 -0.166666666666667011E-01 -0.274567397040450026E-03 -0.183874311667027007E-03 0.177330722022739987E-01 -0.166666666666667011E-01 -0.142934406988999004E-03 0.173279989518831008E-01 -0.109940052362783001E-06 -0.158611614292560006E-03 -0.505928282170762005E-05 0.178189142473604983E-01 -0.166666666666667011E-01 0.191837857135102006E-01 -0.116715933292911990E-02 -0.166666666666667011E-01 -0.115261550950918989E-02 0.193075009282458986E-01 -0.166666666666667011E-01 0.192715504188021997E-01 -0.120392392247149001E-02 -0.117898318238484007E-02 0.194477863400541011E-01 -0.166666666666667011E-01 -0.435541715949862990E-03 0.194766610313947992E-01 -0.120362756621229003E-02 -0.434596353936575995E-03 -0.125584159877691992E-02 0.193897157775975992E-01 -0.435053772892238027E-03 -0.166666666666667011E-01 0.193916866829808993E-01 -0.125980517520797000E-02 -0.435085010459394024E-03 -0.118504095739005010E-02 0.194671524144910991E-01 -0.166666666666667011E-01 -0.139670776269950009E-03 0.172964393783541989E-01 -0.239722661868781990E-07 -0.141640063065594013E-03 -0.121308200605163008E-05 0.178147059781510013E-01 -0.166666666666667011E-01 -0.435069330991408006E-03 0.193887306635153987E-01 -0.125643445841789009E-02 -0.435069453134317996E-03 -0.118150726982896000E-02 0.194636605867372983E-01 -0.140346027430057995E-03 0.178144315994148983E-01 -0.933532497931826955E-06 -0.166666666666667011E-01 -0.140345973839123002E-03 -0.933513688386790009E-06 0.178144303098709995E-01 -0.166666666666667011E-01 -0.139688634719731995E-03 0.172964363725470992E-01 -0.241646447296168007E-07 -0.141743168415957993E-03 -0.122254521803203994E-05 0.178147146809073016E-01 -0.435059870297020007E-03 -0.166666666666667011E-01 0.193910369733737015E-01 -0.125956508747994997E-02 0.000000000000000000E+00 -0.435078913540760980E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.118493531740769997E-02 0.194660938610356007E-01 0.000000000000000000E+00 -0.435045710174240005E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.193875426795061985E-01 -0.125569402526346007E-02 -0.435093072169449998E-03 -0.118112078544045998E-02 0.194631751705481998E-01 -0.139419879842097997E-03 -0.166666666666667011E-01 0.172964832727322999E-01 -0.213161584778305005E-07 -0.140230214929005993E-03 -0.108242332731610002E-05 0.178145871202936984E-01 -0.166666666666667011E-01 -0.435070648727890980E-03 0.194635173256722006E-01 -0.118139703309657999E-02 -0.435068135392832026E-03 -0.125625483745070996E-02 0.193885626932298015E-01 -0.139434374869739004E-03 -0.166666666666667011E-01 0.172964444175751006E-01 -0.849107878205794962E-08 -0.140068522154645993E-03 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.383811097207312005E-06 0.177500579653601005E-01 -0.140208513794423006E-03 -0.166666666666667011E-01 0.177497183126398006E-01 -0.504600381181250993E-07 -0.140451251998147000E-03 -0.520324482149772044E-06 0.178140160927331009E-01 -0.435048849909143985E-03 -0.166666666666667011E-01 0.193915158406403991E-01 -0.125973026996141989E-02 -0.435089932877478006E-03 -0.118503856970659008E-02 0.194671300235494012E-01 -0.435388199753579007E-03 -0.166666666666667011E-01 0.194580790950915003E-01 -0.118263497490555007E-02 -0.434750258397953015E-03 -0.124178531210186001E-02 0.193754724111637015E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.173081480358036992E-01 -0.244895773689184990E-04 -0.166666666666667011E-01 -0.133922698934493990E-03 0.178635891343980995E-01 -0.166666666666667011E-01 0.193849898137051016E-01 -0.125483132565947994E-02 -0.118151827368068998E-02 0.194632328928430985E-01 -0.166666666666667011E-01 -0.142602728611135999E-03 0.172960572372906005E-01 -0.117224517748023995E-06 -0.158587369045146999E-03 -0.567482454000282988E-05 0.178189579824735989E-01 -0.139720580884206998E-03 -0.166666666666667011E-01 0.177036794314488002E-01 -0.306504126161977009E-07 -0.139932852514181998E-03 -0.499375406900951968E-06 0.178139944442497998E-01 -0.546160778925739950E-03 0.184421024561668007E-01 -0.894165394995071063E-04 -0.166666666666667011E-01 -0.147442848397543992E-03 -0.240246210218879017E-04 0.177137736588962001E-01 -0.166666666666667011E-01 -0.400413710403862977E-03 0.186079235147511013E-01 -0.750380380436990973E-03 -0.465646679664696004E-03 -0.925379800643594003E-03 0.191586075383834986E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.177029162142695005E-01 -0.193968097316926012E-03 -0.166666666666667011E-01 0.000000000000000000E+00 -0.245143979550076002E-03 0.179475102930650998E-01 -0.166666666666667011E-01 -0.140401212229714989E-03 0.173460971401947009E-01 -0.290712390926593986E-07 -0.144639789824670004E-03 -0.132223121416671002E-05 0.178148169771682983E-01 -0.166666666666667011E-01 0.193485878399233015E-01 -0.123880031430750989E-02 -0.118081271609363995E-02 0.194575794108941998E-01 -0.139755721134129007E-03 -0.166666666666667011E-01 0.173348680988957005E-01 -0.228988835753830005E-07 -0.141707074016601988E-03 -0.107451126571115990E-05 0.178145682418863992E-01 -0.139419950330182008E-03 -0.166666666666667011E-01 0.172964916756991986E-01 -0.212265241962166009E-07 -0.140190964959546998E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.107696768439272990E-05 0.178142558899423015E-01 -0.166666666666667011E-01 -0.435630511714110973E-03 0.193599454053489002E-01 -0.109078026057308000E-02 -0.434507224229087005E-03 -0.113826963448308990E-02 0.192764822629944987E-01 -0.166666666666667011E-01 -0.139539348987166009E-03 0.173126647078671002E-01 -0.217461924902823005E-07 -0.140774574657894001E-03 -0.106961747895517997E-05 0.178145678787864983E-01 -0.559739151621855982E-03 0.187342113122531996E-01 -0.453812905743477017E-03 -0.166666666666667011E-01 -0.144437865847829008E-03 -0.570572508916922003E-04 0.173662393449596011E-01 -0.166666666666667011E-01 -0.141554479709531003E-03 0.173902724631262995E-01 -0.612976885063492026E-05 -0.543269258690614047E-03 -0.155576910986749004E-04 0.183343607371364015E-01 -0.166666666666667011E-01 0.190435223900498989E-01 -0.109812314311903995E-02 -0.166666666666667011E-01 -0.111910146500585994E-02 0.192002058225646012E-01 -0.166666666666667011E-01 0.191810336309519991E-01 -0.116112312176440999E-02 -0.117547931761240003E-02 0.194335300082579994E-01 -0.166666666666667011E-01 -0.435325182482829002E-03 0.194712654108410016E-01 -0.119439354715173991E-02 -0.434813392979988976E-03 -0.125665050313407000E-02 0.193897929840027988E-01 -0.140180749732543006E-03 -0.166666666666667011E-01 0.177492505912984003E-01 -0.497281655052260991E-07 0.000000000000000000E+00 -0.140418216862212009E-03 -0.517996101551560007E-06 0.178140139712071009E-01 -0.166666666666667011E-01 -0.435069320552657990E-03 0.193884456575911003E-01 -0.125614320010757996E-02 -0.435069463573064001E-03 -0.118122256466309992E-02 0.194633694941631005E-01 -0.166666666666667011E-01 -0.139670893318960006E-03 0.172964735659094995E-01 -0.239712911883834992E-07 -0.141640386974580993E-03 -0.121295694635543998E-05 0.178147058443772002E-01 -0.141657679701159996E-03 0.178147071052390006E-01 -0.121453479340950007E-05 -0.166666666666667011E-01 -0.139673890220687005E-03 -0.240034008201301999E-07 0.172964735798710016E-01 -0.166666666666667011E-01 -0.435069148260108026E-03 0.193887190916836004E-01 -0.125642659983513007E-02 -0.435069635865442011E-03 -0.118150231844085991E-02 0.194636542898556017E-01 -0.139671819173706988E-03 -0.166666666666667011E-01 0.172964367943914989E-01 -0.239065653681595988E-07 0.000000000000000000E+00 -0.141643861658325012E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.120918365724227991E-05 0.178144555196989990E-01 0.000000000000000000E+00 -0.435057571499996014E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.193887668592458984E-01 -0.125672668568808997E-02 -0.435081212181971026E-03 -0.118196881219207990E-02 0.194640540662835010E-01 -0.139678894290856009E-03 -0.166666666666667011E-01 0.172963729635307005E-01 -0.240483845199339011E-07 -0.141687157028328993E-03 -0.121683863414542000E-05 0.178146601266745003E-01 -0.166666666666667011E-01 -0.435063686986125991E-03 0.193888428213916005E-01 -0.125667757246277997E-02 -0.435075097036248006E-03 -0.118183173664951000E-02 0.194639441484454993E-01 -0.139679133300664996E-03 -0.166666666666667011E-01 0.172963909863181990E-01 -0.238879039025341995E-07 -0.141683556731170989E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.120749743554915009E-05 0.178141338836764994E-01 0.000000000000000000E+00 -0.435044658394895018E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.193878886191347008E-01 -0.125607319657829999E-02 -0.435094123787185003E-03 -0.118150841007670995E-02 0.194635600909605008E-01 -0.139671709913231998E-03 -0.166666666666667011E-01 0.172964345130826001E-01 -0.239825539017364991E-07 -0.141645500444457010E-03 -0.121359829368120991E-05 0.178147062946855013E-01 -0.434952586391279994E-03 -0.166666666666667011E-01 0.193531521576786013E-01 -0.122237980995592004E-02 -0.435186153854376021E-03 -0.114985437868289999E-02 0.194307215405399997E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.173112910777010014E-01 -0.253865275619669011E-04 -0.166666666666667011E-01 -0.138259364869725991E-03 0.178692219193366010E-01 -0.166666666666667011E-01 0.193634106886295017E-01 -0.124536794787297005E-02 -0.118108829653989990E-02 0.194595580858667010E-01 -0.166666666666667011E-01 -0.435067679897617021E-03 0.193880971906136984E-01 -0.125581995532966003E-02 -0.435071104218809021E-03 -0.118092995363368010E-02 0.194630638101403984E-01 -0.139730294231680001E-03 -0.166666666666667011E-01 0.172965125754464995E-01 -0.246135293977140997E-07 -0.141982756437381995E-03 -0.124443577379881005E-05 0.178147384420490988E-01 -0.141918111126178997E-03 0.178147321937730009E-01 -0.123840965646412996E-05 -0.166666666666667011E-01 -0.139719091240698002E-03 -0.244908832673502001E-07 0.172965124365098005E-01 -0.166666666666667011E-01 -0.434974852046012982E-03 0.193565226857273995E-01 -0.122539133752111993E-02 -0.435163903370489979E-03 -0.115249079254224011E-02 0.194335197239201007E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.173108110555817013E-01 -0.252486224725561010E-04 -0.166666666666667011E-01 -0.137635748516305002E-03 0.178684051256846992E-01 -0.166666666666667011E-01 -0.140103968384359013E-03 0.173357792453362990E-01 -0.263227891208892990E-07 -0.143405883131451987E-03 -0.122767822040640990E-05 0.178147220760749986E-01 -0.166666666666667011E-01 0.193641798143363991E-01 -0.124570774851647990E-02 -0.118110177764830008E-02 0.194596602740228983E-01 -0.140117598304397007E-03 -0.166666666666667011E-01 0.173359378633641988E-01 -0.264605455567292014E-07 -0.143470065501732002E-03 -0.123349702618485005E-05 0.178147283869347015E-01 -0.139708750660181013E-03 -0.166666666666667011E-01 0.172963799080418992E-01 -0.951229877766877080E-08 -0.141326952140999990E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.428620398176468988E-06 0.177495879055481996E-01 -0.166666666666667011E-01 -0.435139436933837007E-03 0.188854634743449989E-01 -0.626119914025915028E-03 -0.434999328204409004E-03 -0.676888990402598964E-03 0.188324542389332987E-01 -0.166666666666667011E-01 -0.139445446199876991E-03 0.174062644632392988E-01 -0.190291660309238009E-07 -0.139963236529218007E-03 -0.765751518464875966E-06 0.178142630110634005E-01 -0.532897137108844952E-03 0.187657880254245013E-01 -0.512857774623407003E-03 -0.166666666666667011E-01 -0.274572252963939997E-03 -0.183877623767562004E-03 0.177330837430943015E-01 -0.166666666666667011E-01 -0.142934406314033008E-03 0.173279990759568001E-01 -0.109939898040709003E-06 -0.158611602798506006E-03 -0.505927466613655039E-05 0.178189142396055003E-01 -0.166666666666667011E-01 0.191837858462441009E-01 -0.116715939594670996E-02 -0.166666666666667011E-01 -0.115261553028868000E-02 0.193075010029012985E-01 -0.166666666666667011E-01 0.192715504745154011E-01 -0.120392394815805009E-02 -0.117898318409425003E-02 0.194477863480717016E-01 -0.166666666666667011E-01 -0.435541725864183982E-03 0.194766613131175000E-01 -0.120362800929753994E-02 -0.434596343992153973E-03 -0.125584159683252996E-02 0.193897158068569997E-01 -0.435053772712346986E-03 -0.166666666666667011E-01 0.193916866769025011E-01 -0.125980517264371998E-02 -0.435085010639267014E-03 -0.118504095747396995E-02 0.194671524138403003E-01 -0.166666666666667011E-01 -0.139670776270514011E-03 0.172964393783540983E-01 -0.239722661929319005E-07 -0.141640063068849004E-03 -0.121308200634946994E-05 0.178147059781512997E-01 -0.166666666666667011E-01 -0.435069330990369015E-03 0.193887306635063990E-01 -0.125643445843000995E-02 -0.435069453135356987E-03 -0.118150726985585992E-02 0.194636605867599989E-01 -0.140346027478982998E-03 0.178144315994209004E-01 -0.933532505083072018E-06 -0.166666666666667011E-01 -0.140345973895093990E-03 -0.933513695241501967E-06 0.178144303098549985E-01 -0.166666666666667011E-01 -0.139688634737237009E-03 0.172964363725436991E-01 -0.241646449186064015E-07 -0.141743168517025991E-03 -0.122254522732879002E-05 0.178147146809153993E-01 -0.435059870304747008E-03 -0.166666666666667011E-01 0.193910369723532990E-01 -0.125956508720873007E-02 0.000000000000000000E+00 -0.435078913533033980E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.118493531739844002E-02 0.194660938594058003E-01 0.000000000000000000E+00 -0.435045710134425011E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.193875426850105004E-01 -0.125569403179517989E-02 -0.435093072209260005E-03 -0.118112079243948005E-02 0.194631751773721995E-01 -0.139419883587548995E-03 -0.166666666666667011E-01 0.172964832718376996E-01 -0.213161991804850005E-07 -0.140230234433833000E-03 -0.108242532544913993E-05 0.178145871219505987E-01 -0.166666666666667011E-01 -0.435070648695012982E-03 0.194635173293556986E-01 -0.118139703587940995E-02 -0.435068135425710023E-03 -0.125625484210198000E-02 0.193885626975772996E-01 -0.139434378884568000E-03 -0.166666666666667011E-01 0.172964444169705009E-01 -0.849109393347174922E-08 -0.140068540591421997E-03 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.383811762342743974E-06 0.177500579574621994E-01 -0.140208513821624988E-03 -0.166666666666667011E-01 0.177497183043298992E-01 -0.504600292032754029E-07 -0.140451252065581990E-03 -0.520324484157912038E-06 0.178140160927338989E-01 -0.435048849877068026E-03 -0.166666666666667011E-01 0.193915158396935003E-01 -0.125973026964695009E-02 -0.435089932909550007E-03 -0.118503856986224010E-02 0.194671300235720011E-01 -0.435388199753534012E-03 -0.166666666666667011E-01 0.194580790950902999E-01 -0.118263497490352998E-02 -0.434750258397998009E-03 -0.124178531210191010E-02 0.193754724111636009E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.173081480358036992E-01 -0.244895773689180992E-04 -0.166666666666667011E-01 -0.133922698934496999E-03 0.178635891343980995E-01 -0.166666666666667011E-01 0.193849898137051016E-01 -0.125483132565947994E-02 -0.118151827368068998E-02 0.194632328928430985E-01 -0.166666666666667011E-01 -0.142602728611646007E-03 0.172960572372683995E-01 -0.117224517813049999E-06 -0.158587369049941992E-03 -0.567482454330607033E-05 0.178189579824768012E-01 -0.139720580884228004E-03 -0.166666666666667011E-01 0.177036794314212008E-01 -0.306504126126849030E-07 -0.139932852514272990E-03 -0.499375406920812032E-06 0.178139944442497998E-01 -0.546160778925799039E-03 0.184421024561692987E-01 -0.894165395041286943E-04 -0.166666666666667011E-01 -0.147442848396934996E-03 -0.240246210175965007E-04 0.177137736588654990E-01 -0.166666666666667011E-01 -0.400413710403472989E-03 0.186079235147468998E-01 -0.750380380434690947E-03 -0.465646679664998009E-03 -0.925379800643236976E-03 0.191586075383828984E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.177029162142689003E-01 -0.193968097316642006E-03 -0.166666666666667011E-01 0.000000000000000000E+00 -0.245143979549885995E-03 0.179475102930649992E-01 -0.166666666666667011E-01 -0.140401212229714989E-03 0.173460971401947009E-01 -0.290712390925843005E-07 -0.144639789824673013E-03 -0.132223121416698996E-05 0.178148169771682983E-01 -0.166666666666667011E-01 0.193485878399233015E-01 -0.123880031430749992E-02 -0.118081271609363995E-02 0.194575794108941998E-01 -0.139755721134129007E-03 -0.166666666666667011E-01 0.173348680988957005E-01 -0.228988835753840990E-07 -0.141707074016602990E-03 -0.107451126571121009E-05 0.178145682418863992E-01 -0.139419954077844995E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.172964916755905009E-01 -0.212265636585413010E-07 -0.140190986545845990E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.107696962013746004E-05 0.178142558877735016E-01 -0.166666666666667011E-01 -0.435069846225575999E-03 0.194634066063343984E-01 -0.118126932115139994E-02 -0.435068937899507017E-03 -0.125616762178684996E-02 0.193884733114783013E-01 -0.166666666666667011E-01 -0.139670406363162002E-03 0.172964705688706984E-01 -0.239661812368390999E-07 -0.141637605266820997E-03 -0.121271180256684004E-05 0.178147053610641995E-01 -0.142072630916558992E-03 0.178147467538722001E-01 -0.125303449493682997E-05 -0.166666666666667011E-01 -0.139745783560114997E-03 -0.247866380946762004E-07 0.172964714736013984E-01 -0.166666666666667011E-01 -0.434919264262457973E-03 0.193344334821136006E-01 -0.120384249982520997E-02 -0.435219446910729000E-03 -0.113217480938375002E-02 0.194124575348414985E-01 -0.166666666666667011E-01 0.173153477755207987E-01 -0.270609262604330985E-04 -0.166666666666667011E-01 -0.148526207309566006E-03 0.178819709504901003E-01 -0.166666666666667011E-01 0.173381030673696000E-01 -0.222893784193113984E-07 -0.104900286329099006E-05 0.178148086104071984E-01 -0.166666666666667011E-01 -0.435070197675724009E-03 0.194629909526723016E-01 -0.118086398896642989E-02 -0.435068586447951984E-03 -0.125574648914054000E-02 0.193880551117544993E-01 -0.435351553302722974E-03 -0.166666666666667011E-01 0.194221659534600001E-01 -0.114701564680454995E-02 -0.434786972534015011E-03 -0.120710615390273008E-02 0.193420430171151007E-01 -0.166666666666667011E-01 -0.139670121825768003E-03 0.172964394653271000E-01 -0.239652296206953015E-07 -0.141636284668589994E-03 -0.121273580815533010E-05 0.178147056312705013E-01 -0.166666666666667011E-01 -0.435070564583170979E-03 0.194632895210796984E-01 -0.118117290840524997E-02 -0.435068219538200975E-03 -0.125602935372259991E-02 0.193883427513572015E-01 -0.140356103029070988E-03 0.178144326340144016E-01 -0.934844351950365017E-06 -0.166666666666667011E-01 -0.140500373954539995E-03 -0.935645690836052991E-06 0.178144577216279992E-01 -0.166666666666667011E-01 -0.139419788136554014E-03 0.172964863338696991E-01 -0.213147023989074007E-07 -0.140223522691785012E-03 -0.108230807367247995E-05 0.178145859435818003E-01 -0.435214369892329993E-03 -0.166666666666667011E-01 0.194296915933112983E-01 -0.115819918280740002E-02 0.000000000000000000E+00 -0.434924344707075003E-03 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.121631545945458997E-02 0.193618156735865000E-01 0.000000000000000000E+00 -0.435418791966529978E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.194737697407992016E-01 -0.119862135208015992E-02 -0.434719602197164011E-03 -0.125646773106410990E-02 0.193899154748590000E-01 -0.139674312472573998E-03 -0.166666666666667011E-01 0.172964142795966995E-01 -0.239964322444068015E-07 -0.141660280806240994E-03 -0.121419135139913995E-05 0.178146576430187990E-01 -0.166666666666667011E-01 -0.435064741241858997E-03 0.193888829829566983E-01 -0.125669444198150005E-02 -0.435074042815188978E-03 -0.118183195684604006E-02 0.194639537546182004E-01 -0.139703753692154993E-03 -0.166666666666667011E-01 0.172963763508955995E-01 -0.954521092502657923E-08 -0.141306390167940008E-03 0.000000000000000000E+00 0.000000000000000000E+00 0.000000000000000000E+00 -0.166666666666667011E-01 -0.430379165314042026E-06 0.177499308188320999E-01 -0.139758462452603012E-03 -0.166666666666667011E-01 0.177495927166552991E-01 -0.464544378539779993E-07 -0.139910115331338996E-03 -0.481441843847050964E-06 0.178139831899020995E-01 -0.435407966296089978E-03 -0.166666666666667011E-01 0.194236847721564986E-01 -0.114950911636884008E-02 -0.434730445651113973E-03 -0.120708640519090991E-02 0.193421924466846001E-01 -0.416148250027581024E-03 -0.166666666666667011E-01 0.185549551170714998E-01 -0.503294505756778955E-03 -0.452623601339539015E-03 -0.518655138222936054E-03 0.187784353169859995E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.182536262402052986E-01 -0.562462921602506017E-03 -0.166666666666667011E-01 -0.776619709920083967E-03 0.186715804513362012E-01 -0.166666666666667011E-01 0.173100319756059016E-01 -0.233382192352905001E-07 -0.116274420326912994E-05 0.178149543965509007E-01 -0.166666666666667011E-01 -0.143574443571915002E-03 0.173387404346211985E-01 -0.394234709446026970E-04 -0.556824893407507947E-03 -0.414251929427215991E-03 0.187007563880755012E-01 -0.143222946652751988E-03 -0.166666666666667011E-01 0.174281634389273007E-01 -0.707336094321511021E-07 -0.153074744284785989E-03 -0.259053351753128992E-05 0.178159320930474999E-01 -0.143343235945712010E-03 0.178139865856733991E-01 -0.995973199358266933E-06 -0.166666666666667011E-01 -0.140432036984107007E-03 -0.263668001789773996E-07 0.174281413140941997E-01 -0.166666666666667011E-01 -0.434113176607574020E-03 0.193474145478290012E-01 -0.123277679909749010E-02 -0.436022648061583991E-03 -0.117159827098749008E-02 0.194498157758568983E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.172976663726301001E-01 -0.749716013957179010E-08 -0.166666666666667011E-01 0.000000000000000000E+00 -0.328919335683087000E-06 0.177355871461097984E-01 -0.166666666666667011E-01 -0.140992307333851993E-03 0.177352147833274992E-01 -0.465109349848994022E-07 -0.141543817822521994E-03 -0.593601105003849030E-06 0.178140918400187011E-01 -0.166666666666667011E-01 0.193681748787879991E-01 -0.124747135029532009E-02 -0.118117041740436004E-02 0.194601894329670989E-01 -0.402827943802412983E-03 -0.166666666666667011E-01 0.186951349558098986E-01 -0.851875272769232967E-03 -0.463811391972161985E-03 -0.104547015467971994E-02 0.192724136708169007E-01 -0.139676914109847992E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.172965234506337016E-01 -0.258479543060504998E-07 -0.166751637452680000E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.152816050945488003E-05 0.178205642139422984E-01 -0.166666666666667011E-01 -0.435069391172627010E-03 0.000000000000000000E+00 0.193884418587671990E-01 -0.125613776993248002E-02 -0.435069392953110973E-03 -0.118121619381530009E-02 0.194633634747354983E-01 -0.166666666666667011E-01 -0.139670761673773989E-03 0.172964743993378987E-01 -0.239698260758715003E-07 -0.141639618752670998E-03 -0.121288305970079009E-05 0.178147057814278011E-01 -0.141639617499855004E-03 0.178147057813345007E-01 -0.121288294541192010E-05 -0.166666666666667011E-01 -0.139670761456658004E-03 -0.239698237501548993E-07 0.172964743993366983E-01 -0.166666666666667011E-01 -0.435069391184759991E-03 0.193887118629383001E-01 -0.125641385905375007E-02 -0.435069392940977992E-03 -0.118148619757772006E-02 0.194636395637332003E-01 -0.166666666666667011E-01 0.172965077542778997E-01 -0.927568134856068057E-07 -0.166666666666667011E-01 -0.102046181376419003E-05 0.173051339147867016E-01 -0.166666666666667011E-01 0.173041376239571991E-01 -0.235410465543548011E-07 -0.117464369127414998E-05 0.178146677204709010E-01 -0.166666666666667011E-01 -0.435069390787829998E-03 0.193887086342375004E-01 -0.125641056657932004E-02 -0.435069393337907985E-03 -0.118148298373717004E-02 0.194636362741715009E-01 -0.435064997112617009E-03 -0.166666666666667011E-01 0.193885568522766984E-01 -0.125634409504329003E-02 0.000000000000000000E+00 -0.435073786951775026E-03 -0.118148100464545011E-02 0.194636167070038994E-01 -0.166666666666667011E-01 -0.139670744016726994E-03 0.000000000000000000E+00 0.172964393844361013E-01 -0.239719196971529010E-07 -0.141639876847867010E-03 -0.121306495872900994E-05 0.178147059624979009E-01 -0.166666666666667011E-01 -0.435069390950542994E-03 0.193887087721348016E-01 -0.125641070333806998E-02 -0.435069393175194989E-03 -0.118148311476701999E-02 0.194636364105120994E-01 -0.140348088180879989E-03 0.178144318602812997E-01 -0.933801415168445956E-06 -0.166666666666667011E-01 -0.140348088084880012E-03 -0.933801076426891949E-06 0.178144318368827007E-01 -0.166666666666667011E-01 -0.139670745303743007E-03 0.172964393843559987E-01 -0.239719335227778987E-07 -0.141639884277225002E-03 -0.121306563867630006E-05 0.178147059631548997E-01 -0.435069390608568011E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.193887086845648017E-01 -0.125641066869512999E-02 -0.435069393517170026E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.118148310483521006E-02 0.194636363135432984E-01 -0.435069389993829011E-03 -0.166666666666667011E-01 0.193887084930986983E-01 -0.125641043857266008E-02 -0.435069394131910002E-03 -0.118148287005216007E-02 0.194636361542637991E-01 -0.139670502006070010E-03 -0.166666666666667011E-01 0.172964394138492988E-01 -0.239693181051815999E-07 -0.141638479654202995E-03 -0.121293697133386993E-05 0.178147058357341001E-01 -0.166666666666667011E-01 -0.435069390724191016E-03 0.193887813829540998E-01 -0.125648495565761011E-02 -0.435069393401547020E-03 -0.118155573441382005E-02 0.194637106641438011E-01 -0.139699943939322992E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.172964151706326008E-01 -0.947752965014964063E-08 -0.141286304262072990E-03 0.000000000000000000E+00 -0.166666666666667011E-01 -0.427064219430228011E-06 0.177495997096570012E-01 -0.140178146812241995E-03 -0.166666666666667011E-01 0.177492252668765005E-01 -0.496801880864810970E-07 -0.140415043122611998E-03 -0.517779586424547950E-06 0.178140139541973011E-01 -0.435069391018619994E-03 -0.166666666666667011E-01 0.193887087741046009E-01 -0.125641070402467006E-02 -0.435069393107117989E-03 -0.118148311448062994E-02 0.194636364104064999E-01 -0.139670745278655002E-03 -0.166666666666667011E-01 0.172964393844560992E-01 -0.239719332530519000E-07 -0.141639884131573011E-03 -0.121306562524490010E-05 0.178147059631618004E-01 0.000000000000000000E+00 -0.166666666666667011E-01 0.193887976612396006E-01 -0.125650164849242007E-02 0.000000000000000000E+00 -0.166666666666667011E-01 -0.118157208094086998E-02 0.194637273068684995E-01 -0.166666666666667011E-01 0.172964393844281007E-01 -0.239719133479159011E-07 -0.121306464654854992E-05 0.178147059621872987E-01 -0.166666666666667011E-01 -0.139670744021023010E-03 0.172964393843868004E-01 -0.239719197464163008E-07 -0.141639876873170012E-03 -0.121306496126475003E-05 0.178147059625002983E-01 -0.435069390942508026E-03 -0.166666666666667011E-01 0.193887087711050003E-01 -0.125641070244318989E-02 -0.435069393183230011E-03 -0.118148311400550999E-02 0.194636364097085998E-01 -0.140348087386128002E-03 0.178144318601935990E-01 -0.933801313750166004E-06 -0.166666666666667011E-01 -0.140348087289432998E-03 -0.933800972552974967E-06 0.178144318366253995E-01 -0.166666666666667011E-01 -0.435069391191810016E-03 0.193887815133764012E-01 -0.125648507876439009E-02 -0.435069392933928997E-03 -0.118155584777763990E-02 0.194637107833691009E-01 -0.166666666666667011E-01 0.000000000000000000E+00 0.172964151566877000E-01 -0.947849244181166918E-08 -0.166666666666667011E-01 -0.427106377285012985E-06 0.177495993062723005E-01 -0.166666666666667011E-01 -0.140178145569364992E-03 0.177492248215362983E-01 -0.496797136596528977E-07 -0.140415043288561988E-03 -0.517779487315898039E-06 0.178140139541107002E-01 -0.166666666666667011E-01 0.193887977549708004E-01 -0.125650168627734998E-02 -0.118157208932236990E-02 0.194637273908664990E-01 -0.435069390319006992E-03 -0.166666666666667011E-01 0.193887087496051012E-01 -0.125641069305206009E-02 -0.435069393806730991E-03 -0.118148311376660995E-02 0.194636364069746999E-01 -0.139670541873138993E-03 -0.166666666666667011E-01 0.000000000000000000E+00 0.172964393705303011E-01 -0.239412684749743987E-07 -0.141637881918781994E-03 0.000000000000000000E+00 -0.166666666666667011E-01 0.000000000000000000E+00 -0.121130856871430998E-05 0.178146131135742992E-01 0.186264861206935986E-17 0.188469719401502014E-16 0.000000000000000000E+00 0.552421818745575958E-18 0.917870006576351024E-18 0.914518973837806942E-17 -0.462479989200847985E-18 -0.203714864886006990E-16 -0.195088628411609992E-16 0.361312491563162037E-20 0.221189708273807991E-17 -0.223400286947376007E-18 -0.740148597608108019E-18 0.715802397433987990E-18 0.210593204619582009E-19 -0.247092745603879988E-17 0.234522247271034021E-18 0.359989002583078020E-20 0.130125436521942010E-18 0.523865972357081977E-17 0.000000000000000000E+00 0.502798757490152989E-17 -0.774865740148233950E-17 0.305533254404098983E-16 0.000000000000000000E+00 0.162630325872825993E-17 -0.590890184004599994E-17 0.202691596146165005E-15 -0.691124674850884999E-15 -0.677626357803439970E-20 -0.638832248819193008E-17 -0.354921197842210034E-17 0.180212876973253014E-17 0.592118381778119037E-17 0.987885261959830012E-19 -0.448662764248762018E-18 0.394717353420504035E-17 0.000000000000000000E+00 -0.120292707493299994E-16 0.852284551527277001E-17 0.000000000000000000E+00 0.303397988982417995E-17 0.101048383629478005E-18 0.740148804403260964E-18 -0.135433289076572003E-16 0.223533152833267012E-19 -0.447899111107253025E-17 0.212817027997642990E-19 0.000000000000000000E+00 -0.403081803774639981E-18 -0.672555409776245036E-17 0.000000000000000000E+00 0.491279109407494004E-18 -0.205371608391277999E-16 0.173167415736669007E-16 0.462479989200847985E-18 -0.704731412115577978E-18 0.000000000000000000E+00 -0.474338450462407997E-18 -0.612574227454310005E-17 -0.184314369322535985E-17 0.765717784317887969E-17 0.648342840630083007E-17 -0.304990590835039002E-18 -0.592118946587380969E-17 0.339088701131534005E-19 -0.112557736932424994E-16 -0.381335904017217022E-18 0.000000000000000000E+00 -0.669901779474492986E-17 0.138243891116723001E-17 -0.462599103209056006E-18 0.620975527371875958E-25 0.182632162877623988E-17 0.000000000000000000E+00 0.430176265969362029E-19 -0.395986452044138980E-18 -0.425221003356408028E-17 0.763810107027342937E-17 0.449986253228846961E-21 0.149920187933444999E-16 0.933333695763632040E-17 -0.449986253228846961E-21 -0.102263638350942002E-18 0.716941713442839936E-17 -0.185604230015463005E-18 0.148029860142325988E-16 0.765722441914230953E-17 0.000000000000000000E+00 0.395087089926695984E-18 -0.112484787433542003E-16 0.148029860142325988E-16 0.186249855764197994E-18 -0.123980477187407996E-17 -0.177475529531162003E-18 0.000000000000000000E+00 0.468189851533179025E-17 0.846067343136115030E-19 0.251873187799946985E-17 0.000000000000000000E+00 0.427222242771388022E-18 -0.126919416816583993E-16 0.000000000000000000E+00 -0.639166659846272950E-25 -0.577339967784366969E-17 0.000000000000000000E+00 -0.299518791083001013E-18 0.885930391776569904E-18 -0.415350546620027017E-18 0.777912411780388954E-17 0.148029595444530002E-16 -0.870855748895828018E-19 0.370461440010243996E-17 0.000000000000000000E+00 0.925637604759498948E-17 -0.157717534778751000E-16 0.313749473992465019E-16 0.575982404132923990E-19 0.219550939928314995E-17 -0.590890184004599994E-17 0.200929767615876011E-15 -0.698361724352226037E-15 -0.762329652528870004E-20 0.762499059118321011E-17 0.420595399031252993E-17 -0.953539688950861913E-18 0.740148701005684010E-18 0.731844738233841023E-17 0.151625515003419992E-18 0.117147568281026001E-16 0.000000000000000000E+00 0.912148605074475063E-18 0.159581007262709991E-17 0.449986253228846961E-21 -0.130862557845303000E-24 0.171921983655087999E-17 0.000000000000000000E+00 0.158448927876369990E-18 0.945956303890129087E-18 0.984593083121869034E-18 -0.108044346378205993E-16 -0.148029595444530002E-16 0.326160624252108976E-18 -0.372472316079360021E-17 0.148029595444530002E-16 -0.106167109608854004E-16 0.550232602536393962E-17 0.123056946577105000E-16 0.677626357803439970E-20 0.603087458445062034E-18 -0.292734586571086005E-17 0.670850094225405965E-18 -0.124971241037898994E-16 0.000000000000000000E+00 0.128342432167972003E-16 0.179743617411750996E-16 -0.477944389246467971E-18 0.592118946587380969E-17 -0.141023146889066998E-19 -0.776697175539328012E-17 -0.238961064843469015E-19 0.000000000000000000E+00 0.212532690638741003E-16 -0.147509485406731005E-17 0.000000000000000000E+00 0.635022970263750003E-17 -0.216767304995182992E-18 0.592118946264263003E-17 0.350644635857248989E-19 -0.606692755904139014E-18 -0.229492316153697017E-18 0.119207909163859998E-16 -0.462599103209056006E-18 0.119335121574955004E-17 -0.283978943926022000E-18 0.000000000000000000E+00 0.526661030130147969E-18 0.834631058858251966E-18 0.572340905439509016E-18 -0.148029860142325988E-16 -0.952426877165385946E-17 0.592118381778119037E-17 -0.152714247567623012E-19 -0.136949449939554005E-17 0.000000000000000000E+00 -0.453732450462848006E-18 -0.649938968340065021E-17 -0.212605269760829000E-18 0.148061359180052007E-17 0.173938215718671006E-16 -0.149755425074559992E-16 -0.176182853028893989E-18 0.000000000000000000E+00 0.314858028362183973E-19 0.222314384918731995E-17 -0.224993126614423998E-21 0.577572400366616003E-25 0.196242353951357996E-16 0.592118381778119037E-17 0.846496107036753039E-18 -0.256018086451873006E-19 0.809438002004071989E-18 -0.177860077526130999E-16 0.000000000000000000E+00 -0.222683638344174993E-18 0.123060118814754002E-16 0.148029595444530002E-16 0.169406589450859996E-18 -0.813151629364127964E-19 -0.921571846612678961E-18 0.000000000000000000E+00 0.831362837730095975E-18 0.000000000000000000E+00 -0.542101086242751976E-19 -0.159835117146885992E-17 0.000000000000000000E+00 0.726754268744189980E-18 -0.753520509877426044E-17 0.777915058758348969E-17 0.000000000000000000E+00 -0.513301966036106005E-18 -0.420128341838133016E-18 0.608169656128588029E-17 0.578099986501059981E-19 -0.201217970576179000E-17 -0.369654608040760013E-19 -0.148029595444530002E-16 0.144305243943295996E-16 -0.705346343352828992E-19 0.000000000000000000E+00 -0.147612593664461005E-18 0.183565822566964011E-16 0.278007338868100017E-18 -0.662135360669951994E-17 0.462585868319254965E-18 0.382512344841314970E-17 -0.174270411452671991E-18 0.000000000000000000E+00 -0.413352078260099028E-17 0.111698234754425002E-16 0.240451477901814990E-18 0.000000000000000000E+00 -0.117367664498414002E-16 0.000000000000000000E+00 -0.428175154837049014E-18 -0.174319380544935010E-17 0.575982404132923990E-19 -0.271728169479180011E-17 0.735145188877927989E-18 0.530165449030040969E-17 0.000000000000000000E+00 -0.422661168873770002E-18 0.147489611940655008E-17 0.542185789537478032E-17 0.000000000000000000E+00 0.160646151193882004E-16 -0.132814766129474004E-17 0.148027477862161989E-16 0.314846080772212007E-17 -0.793361300897053979E-19 0.000000000000000000E+00 -0.798196203889169946E-19 -0.312772209729571009E-17 0.473491417515153986E-18 0.994130806456849964E-17 -0.462585868319254965E-18 0.140167012111641990E-16 0.389635155736977975E-19 -0.575982404132923990E-19 0.106920968931909993E-16 -0.542101086242751976E-19 -0.921571846612678961E-18 -0.460785923306338999E-18 0.693889390390722992E-17 0.590890184004599994E-17 -0.176182853028893989E-18 -0.708119543904594929E-17 0.000000000000000000E+00 -0.790175860669855044E-18 -0.677626357803440031E-19 -0.899887803162968988E-17 0.000000000000000000E+00 0.282909004382935976E-18 -0.319839640883224001E-17 -0.189735380184963011E-18 -0.921571846612678961E-18 0.940111280245706959E-17 -0.136473551414918999E-17 -0.462585868319254965E-18 0.947011739499720023E-25 -0.774842994750098042E-17 -0.592118381778119037E-17 0.380929522460731986E-18 -0.412849204146454994E-18 -0.790655128793651072E-17 0.574799928767763978E-17 0.449986253228846961E-21 -0.184669492435164995E-16 -0.668551742520389010E-17 0.000000000000000000E+00 0.757247454845344985E-18 -0.758433300971500986E-17 -0.956088439213291989E-19 -0.739967982721356930E-17 0.674366604445490992E-17 0.000000000000000000E+00 -0.129066645337873990E-18 0.567512074660380996E-17 0.740306795900258034E-17 0.289685267960971005E-18 -0.853809210832335031E-18 -0.346944695195360995E-17 0.000000000000000000E+00 -0.440457132572235996E-19 0.140946282423116000E-17 0.126038502551440006E-17 -0.460785923306338999E-18 -0.334096264176697982E-17 -0.121481465295212001E-16 -0.179994501291539010E-20 0.348945878064791996E-18 0.402866943610530006E-18 0.000000000000000000E+00 0.102462200732453003E-16 0.324395420824920984E-19 -0.824904211507282051E-17 0.260886147754324986E-18 -0.169406589450859993E-20 -0.300220240242445980E-18 -0.155398517387845997E-16 -0.462612338098855988E-18 -0.113502414932076005E-18 0.853279815240300958E-17 0.118584612615602002E-19 0.000000000000000000E+00 -0.420128341838133016E-18 -0.590890184004599994E-17 -0.325260651745650993E-18 -0.414707330975704984E-17 -0.739967982721356930E-17 -0.609863722023095955E-18 -0.775204553327135978E-17 -0.110046520507278993E-16 -0.596311194867026976E-17 0.506525702458071988E-18 0.406575814682063982E-19 -0.585807986321074037E-17 0.000000000000000000E+00 -0.957507219399941999E-17 -0.993410828451683924E-18 0.000000000000000000E+00 -0.258493941422821001E-23 -0.140905965640059988E-16 0.000000000000000000E+00 -0.455999856282107988E-19 0.130766005188302993E-17 0.851098705401120981E-17 0.412593057330019984E-17 0.913207396258542929E-21 -0.199420672541224012E-17 0.107126374421619007E-16 0.169406589450859993E-20 -0.169813165265542009E-16 0.111740586401786993E-16 -0.220546203641338017E-18 -0.580217568869195973E-19 0.745639133001020026E-17 0.000000000000000000E+00 0.633051248954183005E-18 -0.106607566741425994E-16 0.580217568869195973E-19 -0.175505226671090992E-17 -0.127986678330124993E-17 -0.686052019522905026E-18 0.000000000000000000E+00 0.469395219121791008E-18 -0.219520603079348977E-18 0.373882580183111030E-17 0.462599103209056006E-18 0.597412337698458029E-17 -0.221075599233372003E-18 0.000000000000000000E+00 0.234092940533119000E-17 -0.456210270350426032E-18 0.000000000000000000E+00 0.247588805817228019E-18 0.115207598134018006E-16 0.293496916223615019E-18 -0.516984574123192008E-17 0.462585868319254965E-18 -0.979837125471933953E-17 0.542101086242752024E-18 0.000000000000000000E+00 -0.111998538839297005E-16 -0.886573158126109934E-17 0.107314690359872997E-18 0.000000000000000000E+00 0.132318325413044006E-16 0.000000000000000000E+00 -0.490891988880818995E-18 0.300838917864701994E-17 -0.449986253228846961E-21 0.153224066374812011E-16 0.752300914782277012E-18 -0.202894665677824002E-16 -0.592118381778119037E-17 -0.607071160275117030E-18 0.706332833781481025E-18 0.183276753962148989E-16 0.740306795900258034E-17 -0.673708830422389036E-18 0.996110745971057006E-18 0.000000000000000000E+00 0.562224322594635998E-24 -0.143283150371639000E-17 -0.592118381778119037E-17 0.695175821679365018E-18 -0.377743814046069017E-18 0.851295905259153931E-17 -0.617021150427395005E-17 0.000000000000000000E+00 -0.699522159489963935E-17 -0.230593470233653008E-17 0.899972506457693922E-21 0.136092783635348004E-16 -0.203287907341032009E-18 -0.235813972515596983E-17 0.000000000000000000E+00 0.151788304147971006E-17 0.000000000000000000E+00 0.718283939271646976E-18 0.339279047022710001E-17 0.000000000000000000E+00 0.740095037663445042E-19 -0.151788304147971006E-17 0.498732999343331963E-17 -0.592076030130756014E-17 0.124344436656931001E-17 0.311708124589582987E-17 -0.189735380184963011E-18 -0.460785923306338999E-18 0.145657373796625989E-16 0.661871544984509977E-17 -0.179994501291539010E-20 0.232356033872243014E-17 0.178769240009195011E-19 0.740148597608108019E-18 -0.485518623621675008E-18 0.132951964373823990E-18 -0.604535355389274969E-17 -0.332301613119702991E-18 -0.169406589450859993E-20 0.379629579047537022E-18 -0.785816701559762074E-17 0.000000000000000000E+00 -0.108081404069648993E-17 -0.867086452280546054E-17 -0.128749007982653989E-18 0.000000000000000000E+00 0.406575814682063982E-19 -0.596311194867026976E-17 -0.157209315010398008E-17 0.542101086242752024E-18 0.739967982721356930E-17 0.348977574268771980E-18 -0.242590236093632002E-17 0.357786716920216001E-17 0.000000000000000000E+00 0.508219768352580023E-19 -0.327971157176865014E-17 0.108420217248549998E-18 0.460785923306338999E-18 0.269695290405768997E-17 0.105729828849834003E-16 0.000000000000000000E+00 0.605520075134427981E-17 0.295792342423450008E-18 0.740148804403260964E-18 0.887040033888795054E-17 -0.196630757771205002E-18 -0.695752862874681990E-17 -0.545383338913363016E-18 0.000000000000000000E+00 0.539586457180591013E-18 -0.226178972740580010E-17 0.000000000000000000E+00 0.101643953670515996E-19 -0.219550939928314995E-17 0.157209315010398008E-17 -0.740306795900258034E-17 0.156701095242045996E-18 0.000000000000000000E+00 0.182959116606928994E-18 0.556924162819702011E-17 0.000000000000000000E+00 0.492126142354748016E-18 0.129542647455343004E-16 -0.267003768769613992E-18 0.000000000000000000E+00 0.511303497676177987E-18 -0.249632582995209015E-17 -0.104854969288279003E-17 0.219719850209398022E-24 -0.877420254237048056E-17 -0.186868703054100004E-17 0.000000000000000000E+00 0.310192729707385011E-23 -0.646582600286570010E-17 0.000000000000000000E+00 -0.831029897533542997E-18 0.106143816202805005E-18 -0.664497347120999001E-18 -0.396502740054638028E-17 0.913207396258542929E-21 0.321382829034003006E-17 0.242124367972641983E-17 -0.190582413132217990E-20 -0.144015989742860999E-18 -0.112475097000963994E-16 0.329502123734093982E-18 0.148029595444530002E-16 -0.150205510589462992E-16 0.000000000000000000E+00 0.901573514533289918E-18 0.229584989725403987E-17 0.000000000000000000E+00 -0.629603722527569038E-18 0.415692006776889032E-17 -0.259039880627106002E-18 0.000000000000000000E+00 0.526221725872965983E-18 0.153314307621524007E-16 -0.121059189334008992E-17 0.219719850209398022E-24 0.518485626953725989E-17 0.124986814033672995E-17 -0.148029595444530002E-16 0.200429726146822988E-17 0.922342050742921017E-19 0.000000000000000000E+00 0.356920501310487004E-18 0.132035399740042995E-16 -0.379419114896017021E-18 0.180196383238973012E-16 0.000000000000000000E+00 0.117761188624817996E-16 0.427189116772794014E-18 0.000000000000000000E+00 -0.945056279636684015E-18 -0.306948887770498012E-17 -0.627647260344651004E-18 -0.148029860142325988E-16 -0.211552721983768995E-16 0.000000000000000000E+00 -0.363690567209131027E-19 0.235090141659442018E-17 0.000000000000000000E+00 -0.154382013937734992E-18 0.104986094217475005E-17 -0.202012618175241010E-16 0.000000000000000000E+00 -0.201818834573138001E-18 -0.277787102030008014E-18 0.922418879559933069E-18 -0.740306795900258034E-17 -0.161027316020146994E-16 0.159919820441612000E-17 0.000000000000000000E+00 0.155950028687531998E-16 -0.217486462555504989E-20 0.592118919445516966E-17 -0.163450889040478004E-18 0.138365478911949995E-17 -0.688214269644119025E-20 0.137618501731589997E-16 0.462638807878457974E-18 -0.300866102864727018E-17 -0.425819344452497991E-18 0.000000000000000000E+00 -0.534138976538562025E-17 -0.230392961653169981E-18 -0.693889390390722992E-17 0.000000000000000000E+00 -0.124683249835832991E-17 -0.593600689435813986E-17 0.203287907341032009E-18 -0.146240238343454992E-17 -0.462585868319254965E-18 0.804681299891585021E-20 -0.921571846612678961E-18 0.758941520739853008E-18 -0.596311194867026976E-17 0.103338019565025002E-18 -0.121972744404619008E-18 -0.641119237776780004E-17 0.578099986501059981E-19 0.904419429430779033E-17 -0.317770699320046013E-17 0.462599103209056006E-18 0.723480113396294003E-25 0.177058483496122985E-16 -0.592123675734038991E-17 0.353544910182120027E-20 -0.227547188927349024E-18 -0.648947871549377031E-17 0.181164310453569009E-17 -0.449986253228846961E-21 -0.153640401864531989E-16 -0.669340050167796020E-17 -0.449986253228846961E-21 0.501443504774546032E-17 -0.146028480106641009E-16 -0.454644934438745996E-18 0.580217568869195973E-19 0.102203789509092007E-17 0.000000000000000000E+00 -0.394823232538910986E-18 0.152398167869993994E-16 0.000000000000000000E+00 0.485858098545066983E-17 0.339523416855172998E-18 -0.196963000034115994E-18 0.000000000000000000E+00 0.163869376255981004E-16 -0.375984607678321967E-20 0.602139840335321016E-17 0.000000000000000000E+00 0.374388562686400991E-18 -0.187956610995729014E-17 0.000000000000000000E+00 0.117614743347384008E-23 -0.156049686373756991E-16 0.592118381778119037E-17 -0.111927793918019994E-17 -0.125932623433033007E-17 0.107700239243383999E-17 -0.680955925205089026E-17 0.000000000000000000E+00 -0.474338450462408009E-19 -0.198459819541683001E-16 0.148031713026898014E-16 -0.589365524699542022E-17 -0.100559751498030998E-16 0.119262238973404995E-16 0.000000000000000000E+00 -0.406575814682063982E-19 -0.298155597433513989E-17 0.291379333855478980E-18 -0.143173979074393986E-16 -0.580217568869195973E-19 0.793500464987829020E-17 0.509551198266546019E-17 -0.207274917893538006E-19 0.592118946587380969E-17 -0.249908860027332019E-18 0.726475167665756951E-17 0.143720452004045010E-18 0.219719850209398022E-24 -0.713783873983173068E-17 0.273122128532314001E-17 -0.148029860142325988E-16 0.616308195352442029E-17 -0.103685693916237999E-18 0.000000000000000000E+00 0.172732113104808003E-18 -0.119086404205086997E-17 0.148812751953918994E-18 -0.973629116658004042E-17 0.462585868319254965E-18 -0.128415437610183997E-16 0.364164300122516006E-18 0.581611368201348043E-25 0.965757993893369943E-18 -0.318378260621606992E-17 0.523869464800819009E-18 0.000000000000000000E+00 -0.125934272098304005E-16 -0.592118381778119037E-17 0.170794585972679995E-18 -0.638088177192808014E-18 0.148029860142325988E-16 0.636134305731703948E-19 -0.115664869328127008E-16 0.103770143886900998E-17 -0.740148701005684010E-18 0.109459983278530000E-16 0.860412658285831038E-19 0.453890545720096961E-18 -0.462479989200847985E-18 0.574764794271245981E-18 -0.417079023228017025E-17 -0.359989002583078020E-20 -0.123844851232456994E-24 -0.997743062832839024E-17 -0.592118381778119037E-17 -0.350289482720280986E-18 0.671115955244159048E-18 -0.462718217217262968E-18 0.366619682373303022E-17 0.148029595444530002E-16 -0.269938812378104984E-18 -0.623165778889683993E-17 -0.148029595444530002E-16 -0.428259858131774020E-17 0.948338087745914968E-17 0.300036010576418020E-16 0.000000000000000000E+00 0.409286320113278001E-17 -0.590890184004599994E-17 0.201607393973679993E-15 -0.699147770927278027E-15 0.762329652528870004E-20 0.813490442543030046E-17 -0.681799066366914999E-17 -0.607060631336584989E-19 -0.592118946264263003E-17 0.138582841111243012E-18 -0.269407654562161003E-17 0.361013453120626983E-18 0.232644547280538988E-24 -0.191654297740858989E-16 -0.185276296712765011E-18 0.000000000000000000E+00 0.658091044686770012E-17 0.383052929495380021E-18 0.000000000000000000E+00 -0.177559921692520007E-18 -0.413370558966372027E-17 -0.446111077151846004E-19 -0.468735550811414966E-17 0.000000000000000000E+00 -0.165755308426161985E-16 0.248649031945594005E-18 -0.219719850209398022E-24 -0.274940472223534988E-18 -0.658549193252238029E-19 0.554334983005379011E-18 0.000000000000000000E+00 0.614026536820119009E-17 -0.592118381778119037E-17 0.286291090607452007E-18 -0.122457478756202001E-16 0.148029860142325988E-16 -0.552535673821147045E-18 0.768758534050744985E-19 0.492023165039092978E-18 0.592118381778119037E-17 0.874500688343341996E-18 0.104618365722086995E-19 0.111289438122280006E-16 -0.462585868319254965E-18 0.884185636357436999E-17 0.155484871948446003E-18 -0.148029595444530002E-16 -0.592115734800158020E-17 -0.959692361744608029E-20 0.592118946587380969E-17 -0.191465157017479006E-18 -0.592118381778119037E-17 0.220854337068372988E-18 0.888178896156158063E-17 0.000000000000000000E+00 -0.806069459784106008E-17 0.127742485722792993E-18 0.219719850209398022E-24 -0.296056543911099002E-17 0.296059190889059018E-17 0.719732306674833017E-19 0.000000000000000000E+00 -0.120712129288665994E-16 0.000000000000000000E+00 0.154282173561696008E-19 0.296059190889059018E-17 0.449986253228846961E-21 -0.296056543911099002E-17 -0.166744391367418999E-17 0.868868912763262986E-17 0.000000000000000000E+00 -0.440416500555818969E-18 -0.236804406269326013E-18 0.139891281099502011E-16 0.000000000000000000E+00 0.569938098259822986E-17 0.131207118545926006E-16 -0.449986253228846961E-21 0.720490237761527062E-17 0.609070955769230024E-18 0.000000000000000000E+00 -0.856757678897806949E-17 0.148001282823888015E-19 0.148369108142005008E-16 -0.160937199231215010E-18 0.000000000000000000E+00 0.250599984904598978E-18 -0.593031173923045011E-17 0.000000000000000000E+00 -0.747366643183191062E-17 -0.539419458477777017E-18 -0.728712515589093926E-17 0.000000000000000000E+00 -0.489944721671871044E-18 -0.740148688080987035E-18 0.126086815672215012E-18 0.800808902944986061E-17 0.000000000000000000E+00 -0.401836344027630001E-18 -0.161952699515021994E-17 -0.221719344273285996E-16 0.591906623541305000E-17 -0.113502414932076005E-18 0.769783542464707994E-17 -0.243945488809238016E-18 0.000000000000000000E+00 -0.169496586701505991E-16 0.197941011861488998E-18 0.000000000000000000E+00 -0.302670152115197982E-25 0.196007899393148981E-17 0.000000000000000000E+00 0.268184722574811015E-18 -0.507787081806380982E-18 -0.568046702169790025E-17 0.335786576739177007E-17 0.000000000000000000E+00 0.116899459592193998E-16 0.128688313346714001E-16 0.000000000000000000E+00 -0.259192081859816000E-18 -0.276302147394353002E-17 0.527066251428988001E-18 0.740306795900258034E-17 -0.150450521486901014E-16 -0.592118381778119037E-17 0.143254447204384002E-18 -0.742678488152571000E-17 -0.740306795900258034E-17 0.813151629364127964E-19 -0.108420217248549998E-18 -0.135525271560687996E-17 -0.592076030130756014E-17 -0.118245799436700000E-17 -0.325260651745650993E-18 0.962229428080884993E-18 0.000000000000000000E+00 0.177156676220440991E-16 0.103507426154475994E-16 -0.179994501291539010E-20 0.233949856058605991E-17 0.612251172066077035E-18 -0.740148597608108019E-18 -0.667834855456528034E-17 -0.108665062709865995E-18 0.525176309165426997E-17 -0.475608999883290026E-18 0.190582413132217990E-20 -0.340083728322602019E-18 0.438821300192851001E-17 -0.462585868319254965E-18 -0.132984172718925009E-18 0.124611252035315994E-16 -0.315096256378600015E-18 0.740306795900258034E-17 -0.191090632900570002E-17 0.590890184004599994E-17 -0.216840434497100983E-18 0.314418630020796016E-17 -0.739967982721356930E-17 -0.860585474410368952E-18 -0.244623115167042015E-17 0.140133130793750986E-16 -0.591906623541305000E-17 -0.271050543121376012E-18 -0.395733792957208964E-17 0.256142763249699999E-17 0.000000000000000000E+00 0.588772601636464012E-17 0.292279306361937011E-18 -0.462585868319254965E-18 0.305022850878928992E-23 0.125181543436834992E-17 0.592123675734038991E-17 -0.840797659796876002E-18 -0.119751929896036996E-17 -0.803410750470703968E-18 0.990969653027917929E-17 0.000000000000000000E+00 -0.615012094155625980E-17 -0.804469541654772036E-18 -0.190582413132217990E-20 -0.342201310690737000E-18 0.120498907076396997E-16 0.361683068477586004E-18 0.000000000000000000E+00 0.190463629996254992E-16 0.000000000000000000E+00 -0.736389268519206972E-18 -0.564293349460814963E-17 0.000000000000000000E+00 0.467562186884373980E-18 -0.188380127469356010E-17 0.650521303491303046E-18 0.000000000000000000E+00 0.298155597433514008E-18 0.254787510534094004E-17 0.474338450462407997E-18 0.000000000000000000E+00 0.253944448053778995E-17 -0.988826262624670062E-17 0.179994501291539010E-20 0.443980138421951982E-17 0.118181258669405998E-18 0.740148597608108019E-18 0.269978517047506999E-17 0.191455088678460996E-18 -0.363737123374678000E-17 -0.190370654895404001E-18 0.000000000000000000E+00 0.452156775156186007E-18 0.744150207898425038E-17 0.000000000000000000E+00 -0.131290106824417010E-18 -0.699437456195238967E-17 -0.154159996400282999E-18 -0.740306795900258034E-17 0.120617491689012002E-17 0.000000000000000000E+00 -0.704731412115577978E-18 0.135525271560687996E-17 -0.739967982721356930E-17 -0.304931861011547978E-18 -0.133796794952696997E-17 0.309348476086989009E-16 0.000000000000000000E+00 -0.444083492377669035E-18 0.396729056670233018E-18 -0.410980386007786987E-17 0.000000000000000000E+00 -0.829202903714597009E-17 0.612574227454310005E-17 0.000000000000000000E+00 0.876900950248488035E-17 -0.586046090260397985E-19 0.000000000000000000E+00 0.543265756545227034E-17 0.562482816536058969E-20 0.418476627590987005E-17 -0.160089227031063005E-18 0.304931861011548002E-19 -0.397258452262267003E-18 0.144321708717923994E-16 0.000000000000000000E+00 0.203287907341032004E-17 -0.487890977618476995E-17 -0.101643953670516005E-18 0.184314369322535985E-17 -0.609863722023096013E-17 0.000000000000000000E+00 0.298968749062877971E-16 -0.674661742488049989E-17 0.000000000000000000E+00 -0.346859991900635989E-18 -0.512412489595360027E-25 -0.229649779221929994E-16 0.592118381778119037E-17 0.968136764274826967E-18 0.248448055279030997E-19 -0.107229265609408996E-16 0.148029595444530002E-16 0.106183896722397993E-16 0.555835236088826021E-17 0.000000000000000000E+00 -0.102162426050442001E-16 0.105082980687594002E-17 0.000000000000000000E+00 0.760580847189921965E-17 0.559763864169073973E-19 -0.733749295744850035E-18 0.839903522791146024E-19 0.219719850209398022E-24 0.117210647846692996E-18 -0.125995396489508994E-16 0.462585868319254965E-18 0.128018813561245007E-16 0.318308925156784023E-18 -0.818753745432010944E-17 0.000000000000000000E+00 -0.817699696977523956E-19 0.740148681618638981E-18 -0.599551467812316950E-19 0.108906402847091006E-16 -0.462599103209056006E-18 -0.494820159361191037E-18 -0.232935714856157988E-18 -0.685910313144217059E-19 0.143819535639848992E-17 -0.400925499014078976E-17 -0.410781759263196998E-17 -0.735211518423296988E-18 -0.686166170447438018E-18 0.184201211014737999E-17 0.565444743514457976E-18 -0.496970112021859027E-17 -0.300019069917472988E-17 0.677626357803439982E-18 -0.639762992444437982E-18 0.112363387228590996E-17 -0.391203242024195008E-17 0.452220302627229964E-17 0.251521139731243992E-17 -0.321237245246193018E-17 -0.204498708472292984E-17 0.573610877316734989E-18 0.449273430335979002E-18 -0.296543215624495000E-17 -0.273389396303369975E-19 0.644925013057074959E-18 0.106550458191936003E-17 0.459232167923234995E-18 0.257365667067299011E-17 -0.726396926719566976E-18 -0.219646231134880988E-17 0.282697246146122979E-18 -0.721963238636283036E-18 -0.174012786050891999E-17 0.322031338634244021E-18 -0.812092838180059969E-19 -0.176127328530276993E-18 0.192190328165929010E-17 0.695940524306275957E-18 0.238706623117694998E-17 -0.975821659906356921E-18 -0.341196120810363013E-17 -0.296800344717906993E-17 -0.310014058695074020E-18 0.106340354316346995E-17 0.127478458561772008E-17 0.185161402269790016E-17 0.246253653590506004E-17 0.468239813242176996E-17 0.147044919643347001E-17 -0.303020954682541977E-15 0.120558517020060009E-15 0.514724981387492984E-16 -0.395303500220003983E-15 -0.937401198330966936E-15 -0.695346287059999988E-16 0.360751332235607007E-16 -0.841069835305629941E-16 -0.465122731996280971E-16 0.138223865591080994E-17 0.316557388212613009E-17 0.153530511954130000E-17 -0.114158865466198001E-17 0.251918186425269985E-17 -0.826679341101821022E-18 -0.242299109877044003E-17 -0.276451039904612007E-17 -0.219837847523779012E-17 0.189542150793871004E-17 -0.349685640873116977E-17 -0.322931311140702004E-19 0.296090954624581016E-17 0.262580213648832990E-17 -0.138913403349705005E-17 0.200852687617675992E-18 0.140967458246796992E-17 -0.665692623106138030E-18 0.296959494267761993E-18 -0.141501031100923995E-17 0.514096059424157008E-18 0.474857258142600987E-17 0.234265490408897985E-17 0.238700305525767010E-17 0.123478461273650007E-17 -0.777840529785158018E-18 -0.393563436465992003E-18 0.400450542246108993E-19 -0.231993556138459993E-18 -0.105675301103853995E-17 0.725377840204900971E-18 0.508905335644263999E-17 -0.168930133418030005E-17 0.151288025313498993E-17 -0.167222832633720003E-18 0.128336079420867007E-17 0.258071334847867999E-17 0.101915268911433000E-18 0.208333047333116009E-17 0.371393755278652995E-18 -0.927165655505069004E-18 -0.352746209139563026E-17 0.397578736595447006E-17 -0.321124748682885999E-17 0.311549305911972006E-18 -0.439610099624981972E-18 -0.860585474410368952E-18 0.169745402629762000E-17 0.229715335295365982E-17 0.248350060134961013E-17 -0.577305893113008979E-18 0.141623908780918997E-17 -0.112485975395371008E-17 -0.291379333855478980E-18 0.351052804989545007E-17 -0.548877349820787005E-18 0.332036915323686022E-18 0.230392961653169981E-18 -0.317806761809812986E-17 -0.157886941368202007E-17 0.170761842166467006E-17 0.134170018845081000E-17 -0.340422541501502998E-17 -0.157209315010398008E-17 -0.237169225231204018E-17 0.648827237596793963E-18 -0.152211820621598002E-17 -0.254906293670056003E-17 -0.265743104157052001E-17 -0.576081633090791036E-18 0.295951293498425985E-17 0.491017591086957017E-18 0.570295000582212027E-17 0.425298666891756003E-17 0.322490243264190984E-17 0.170575850282616993E-17 0.316149982129022996E-18 0.316149982129022996E-18 0.170575850282616993E-17 -0.104501833606318004E-18 0.278429414238003988E-19 0.201993100848688014E-17 0.689958412277429955E-18 0.253745521456217996E-17 0.317289218488110982E-17 0.416205158728287997E-19 -0.223087532941556984E-17 0.148681372554603001E-17 0.224561764616170982E-18 -0.161663087507475004E-17 0.252552248152889988E-19 0.181779337333578008E-17 0.362974215988189973E-17 0.105511449044334006E-18 0.811011020335495983E-18 0.212430842451632013E-17 0.291664890900878997E-18 -0.253106873399842011E-17 0.740117494324606029E-18 -0.573664210352185016E-17 -0.213444055721414999E-17 0.191567131261783994E-18 -0.154648946414654008E-17 0.911779475726123009E-19 0.386130223460529005E-18 -0.437674703786760990E-19 0.153868974389064991E-17 0.341944482530724993E-19 0.120185332147335992E-17 0.429533419052512003E-18 -0.521260203663573975E-17 -0.244979246689270001E-17 0.353401443498422013E-17 0.108347743827260008E-17 -0.297276516321888987E-17 -0.502982861212076016E-18 0.717817057257475013E-18 -0.404118796699791005E-18 -0.824222669082385976E-18 -0.496792572966965000E-18 -0.145300107449145002E-17 0.429727421330391006E-18 0.674705684382426013E-18 0.516237685549988988E-18 0.152970157584615000E-17 0.114320869044779997E-17 0.120185320191991991E-17 -0.132446040544864991E-17 0.212862970992680982E-17 -0.506722571443859031E-18 -0.328233673284015989E-17 0.174357265416989999E-19 -0.250028140764131988E-17 0.304621564884264023E-18 0.287174051018836004E-18 -0.360316566105648980E-18 0.246878009516860991E-17 0.307227700801689012E-17 -0.398859873928170002E-18 0.135186458381786007E-17 0.163646765409530999E-17 -0.134905216973517997E-17 -0.277534977379301996E-17 0.107718664691529003E-17 -0.868460233841873968E-18 -0.314460981668159001E-18 -0.295265097501007996E-17 0.587343884942705016E-18 -0.103889092135869005E-17 0.220358991989202002E-17 0.172080450780937999E-17 0.203458402189975985E-17 -0.188128398314380995E-17 0.435593310570423997E-20 0.150997560841400997E-17 0.338780091677218015E-19 -0.474444329580814977E-18 0.134286485875329006E-17 0.260568510399103997E-18 0.163964402764750996E-17 0.391340595364909024E-18 -0.555124217806786988E-18 -0.134424128729257002E-17 0.120085428439502995E-18 -0.232039433643183985E-17 0.311023860107363000E-17 0.161539357362928995E-17 -0.227030637899263991E-17 -0.123412700414952002E-17 0.460785923306338999E-18 -0.167034897198548008E-17 -0.772738893357237991E-18 0.655603501174828000E-17 0.355923244436257002E-17 0.966041076343529922E-18 0.461463549664142979E-17 -0.813151629364127964E-19 -0.300879655391883993E-15 0.116570897662623010E-15 0.517977587904949990E-16 -0.397920832027020018E-15 -0.938173692378863056E-15 -0.685385179600290032E-16 0.332324906525752010E-16 -0.811796376648520952E-16 -0.500833641052523013E-16 -0.327264414061500016E-17 -0.249197093082215006E-17 0.355081340008800993E-17 0.577464711790618967E-18 0.309632893868808988E-17 0.237164602067062017E-17 -0.236675814580756013E-17 0.129894498034675006E-18 -0.177843283691745018E-17 0.905876559778580015E-18 0.334760614285670004E-18 0.702903342961835980E-18 0.194244010831745001E-17 -0.311708124589582987E-17 -0.384044738285100005E-17 -0.259964999424184991E-17 -0.230437174456910998E-17 -0.203085578963201996E-17 -0.186536838192342997E-17 -0.292541894847392008E-17 0.444950377659624030E-18 0.473120840600730037E-18 0.144694403214716009E-17 -0.387457071246265019E-18 -0.268733332021511007E-17 0.480630192996640042E-18 -0.968667995173845007E-18 -0.225453915329464011E-17 -0.172670305520930993E-17 0.397513637481238987E-18 -0.223802472500957004E-17 -0.103900998689928001E-17 0.125363275017413008E-17 0.283438399974970010E-18 0.289612476067066011E-17 0.115254714341709003E-17 -0.905774764864446938E-18 0.102320256539338992E-17 0.130652185136016004E-17 -0.227171558456370018E-18 -0.315659132105927017E-17 -0.140885526007124000E-17 0.456135927492873002E-17 -0.136525167485142000E-17 -0.181712389987688988E-17 0.477260714130435988E-17 -0.161968912255027995E-17 -0.847032947254299978E-19 -0.283247817561837995E-17 -0.347622321553165013E-17 0.308735568340312003E-17 0.609016689075842040E-18 -0.308828212568918019E-17 -0.265629532258948987E-16 0.869394617061814022E-17 0.707441917546791951E-17 -0.104625509644850993E-16 -0.644592072860523040E-18 0.182366193543851017E-17 0.220253050832250004E-17 0.181095644122968989E-17 0.465952824284591002E-17 -0.179909797996813007E-17 -0.443167638003450014E-17 -0.110114283143058996E-18 -0.271050543121376012E-18 -0.184653182501437012E-17 -0.338094772539718024E-18 0.535365328665338963E-18 -0.193755392891210995E-18 0.238648297443423004E-17 0.185387667186595989E-18 -0.583221945298563973E-17 0.392947092559352014E-17 0.206476279333813002E-17 0.168586566858662003E-17 0.805104553428405950E-19 0.805104553428405950E-19 0.168586566858662003E-17 0.710517439930086985E-19 -0.850266110884137972E-18 0.661505608924907992E-18 -0.318501041740767019E-17 -0.399340393980533022E-17 0.329464601164237984E-17 0.105759985244622995E-17 -0.263066876232583997E-17 -0.462818734786794995E-17 0.282065931092569004E-17 0.273173264807079987E-18 -0.191766477016721988E-17 -0.325548795749949012E-18 -0.339916505461695014E-17 0.329796223641775006E-19 0.184871977884447985E-17 0.476420507970624038E-18 -0.416450631384511008E-17 0.953001031626930087E-18 0.133898346897670997E-17 -0.128899552551295991E-17 -0.313369156609463019E-18 0.327541076635822000E-17 0.831022609569495005E-18 -0.292570682126152987E-17 -0.143599266409235012E-18 0.746109032496585016E-18 0.369159449897113977E-18 0.213209126763412006E-18 0.120185446207787991E-17 0.429056143033479976E-18 0.100975456627066995E-17 -0.214856782078001993E-17 0.104065643190709998E-17 -0.985474136165822018E-19 -0.219472349794965002E-17 -0.121077973535169001E-17 -0.306481117342981015E-17 -0.729646695292633014E-18 -0.417870234395862012E-17 -0.982270248064390015E-19 -0.222740183391410013E-17 0.429074284915790001E-18 -0.757393922655926042E-18 -0.222277203054574005E-17 -0.366790309232547969E-17 -0.513331296435003975E-18 0.120185407756813993E-17 -0.768679095514467055E-19 -0.849541118088088029E-18 0.468620978068442048E-18 -0.329866393396347012E-17 -0.130866590350788998E-17 -0.196412382089491007E-17 0.201593841446522999E-18 -0.493820208249257001E-18 0.525838053655470036E-17 -0.339024937138534000E-18 -0.388951895548529013E-17 -0.206676039130048994E-18 0.863973606199385961E-18 -0.153736479926655993E-18 -0.531777872198089969E-17 -0.115969894699322004E-17 0.228376918290164009E-17 0.239277618139722024E-18 -0.675919057019131015E-18 0.872979948708863904E-18 0.700124022566006054E-19 0.160436471636099012E-18 -0.430442219081631023E-17 0.141636781779202008E-17 -0.264584619614596010E-17 -0.214590875462231992E-17 0.136936648480856995E-18 -0.261616426743056012E-17 -0.140853348681295003E-18 0.109188833473735006E-17 0.822558327290080971E-18 0.590700594208202994E-17 0.211440599458354992E-17 0.103192880386794999E-17 0.259586853807156981E-18 0.285297240247498991E-17 0.138834510998783011E-19 0.738126141010415962E-18 -0.284059369630692984E-17 -0.500243348423792998E-17 -0.177634224131680013E-17 -0.282456371151746988E-17 -0.224463731022390023E-18 -0.116551733542191996E-17 0.295162620673421996E-17 0.304931861011547978E-18 0.155854062294790993E-17 0.683820286230236957E-18 0.203340846900235013E-17 -0.972340883888733010E-18 0.813151629364127964E-19 0.219550939928314995E-17 0.636214387616585012E-18 -0.135525271560688006E-18 -0.958841296291867984E-18 -0.337270652495431019E-17 0.881761298091727038E-18 -0.346436475427008983E-18 0.455327854752469024E-17 -0.188188717875612010E-17 0.140212904092025990E-18 0.250969594776295999E-17 -0.298155597433514008E-18 -0.317806761809812986E-17 0.271050543121375993E-17 0.335425047112703011E-17 -0.105879118406788005E-17 0.387941089842470018E-17 -0.111130722679763993E-17 0.620875150337401956E-18 0.674555863369643047E-18 0.227798923252203012E-17 0.182620303428026999E-17 -0.209725357740164995E-17 -0.209725357740164995E-17 0.182620303428026999E-17 -0.414781446358590017E-19 0.219658225253763007E-17 -0.306111089692803988E-17 0.206912281912994004E-17 -0.255938945946767010E-17 0.304542693212857005E-17 -0.388794043270335027E-17 -0.294961296034002983E-17 -0.640841332232825959E-18 -0.176859620984863995E-17 -0.351940186256116005E-18 -0.669305072706347014E-18 0.115719589845962996E-17 0.249115036765450015E-17 -0.267263072927983979E-18 -0.327855516196318992E-17 -0.658166933893129994E-20 0.288553457408329992E-17 -0.288969575785353004E-17 0.633241671747305985E-19 0.180772237182975992E-17 0.283017902710578996E-17 -0.565930133097904035E-17 -0.921226912297244036E-19 0.248537333825643019E-17 0.337764313885002993E-18 0.278072479341339001E-18 0.197859530774522996E-17 0.566495635123675990E-17 0.179570984817912009E-18 -0.119601052152306996E-17 -0.127224348677596007E-17 -0.247841840366608010E-17 -0.225030184305866019E-17 0.475991984506901981E-18 0.205724781425613012E-18 -0.942906488971646074E-18 -0.319654352426012007E-17 -0.119902807639766995E-17 -0.333492753201779013E-18 0.201170324972895992E-20 -0.324868899007545985E-17 0.537018888559226030E-18 0.262834323533009010E-17 0.974087889342445029E-19 -0.954182615081968947E-18 0.422838847269347037E-17 0.232256434137128999E-17 0.579741112836364996E-18 -0.817757371014823969E-18 0.861889628043636043E-19 0.258000547574478990E-17 -0.228948704303652010E-18 -0.986816337813258049E-18 -0.235678976639628984E-17 -0.976768781707729945E-18 -0.542140790912154955E-18 0.304632752502048986E-17 0.286085377935140017E-17 0.137515798986736007E-17 0.341100167859307016E-17 -0.366794382918673990E-17 0.388735183230519980E-18 -0.792068449911377045E-18 0.321449003483007017E-17 -0.494074318133433003E-17 -0.477853637193513999E-17 -0.292649883276361008E-18 -0.408307537973959009E-17 -0.194192967971836990E-17 -0.100750598608959000E-18 0.430052854827544002E-18 0.212506008087322998E-18 -0.402770783864319995E-18 -0.447233396150270964E-18 -0.198459819541682993E-17 -0.236970701884191007E-19 -0.220994204661097016E-17 0.296419179891642018E-17 -0.212986434587094013E-17 -0.359565486109450012E-18 -0.322052514457926014E-17 -0.491003823699637011E-17 0.156785798536770993E-17 0.182249726513603012E-17 0.101220437196889006E-17 0.126123205846164993E-17 -0.939359538505019076E-18 0.159242194083808006E-18 -0.338384368472172993E-17 -0.302051948990884002E-17 -0.450578514547435023E-18 -0.156574040299956994E-17 0.863973606199385985E-19 0.408269880576573004E-18 -0.350142244571246017E-18 0.166018457661843011E-18 0.184314369322535985E-17 -0.124683249835832991E-17 0.245300741524844993E-17 -0.271050543121375988E-19 -0.525838053655470036E-17 0.500596471827291982E-18 -0.289852854753073998E-17 -0.358234790318279013E-17 -0.745680161159402964E-18 0.395130447418413017E-17 -0.782221691899545996E-18 -0.116890546721092989E-18 0.241870258088465000E-17 0.352365706057788989E-18 0.645439105807776955E-18 0.299849663328021983E-18 0.216671027907649988E-17 -0.233781093442186990E-18 -0.794834541879753984E-18 -0.102999206386122998E-17 -0.467562186884373980E-18 0.241234983378025006E-17 -0.226581313390524994E-17 0.247037159066717002E-17 0.853809210832335031E-18 0.853809210832335031E-18 0.250594697445184986E-17 0.696049324406221015E-18 0.310955224796036990E-17 0.556959317995735977E-19 -0.495739267270380026E-18 -0.152187997819956003E-18 -0.313020110589274015E-17 0.360892352575412997E-19 0.274798235917439008E-17 0.219805416308431992E-17 0.128987869551562998E-17 -0.181271491937153990E-17 0.479946121031822013E-17 0.521859986993336979E-17 -0.162858689760435992E-17 -0.410148944122608997E-18 -0.129790139506878007E-17 0.180246481185638019E-17 0.252833916722396005E-17 0.184847156355081005E-17 0.234299073457090997E-17 0.381342421941949979E-17 -0.210785886003519001E-17 -0.186922460423510001E-18 0.283411363932078009E-17 0.245599891393374988E-17 0.115727546289480009E-17 -0.267567233540009983E-17 -0.230406371026381001E-18 0.847032947254299978E-19 -0.199476259078387999E-17 -0.149691897603515996E-17 0.169237182861408996E-17 0.202949094162130015E-17 0.258247110728035017E-17 -0.483304261121869975E-18 -0.267057163278154011E-17 -0.105757357420620006E-17 0.236854234853944018E-17 0.219443737320928007E-17 -0.769317674343717994E-18 0.107361426064483001E-18 -0.208888912704750988E-17 -0.317531476101955994E-17 -0.239075049362525982E-18 0.460785923306338999E-18 0.297573262282276003E-18 -0.931736241979730072E-20 0.330131091192363981E-17 -0.233781093442186990E-18 0.243924312985556986E-17 0.508219768352580011E-18 0.596311194867026957E-18 -0.164154985177883001E-17 -0.290601122335189004E-17 0.116551733542191996E-17 0.240557357020220988E-17 -0.636968776335234048E-18 -0.350774872303727035E-17 -0.278843246236116018E-17 -0.640356908124250960E-18 0.105032085459533001E-18 -0.916951328718438058E-18 0.397946666531910978E-18 -0.123324688397775994E-17 0.324032453972133018E-17 -0.997275416273532054E-18 0.182323841896488004E-18 -0.127499634385454002E-17 -0.769560981766082995E-18 -0.242439037817415004E-17 0.214918066753528012E-19 -0.279227223476463018E-17 -0.537361858320706021E-18 -0.899122578378295995E-19 0.277170356165288005E-17 0.464597571568984025E-18 -0.136671413017441992E-17 0.274447939333253981E-17 -0.310972264716655005E-17 -0.120358087848916004E-18 -0.688214269644119025E-21 0.293847640803337985E-18 -0.641415699308319028E-18 -0.881761298091727038E-18 0.296628622022741023E-18 -0.105601516593214997E-17 -0.284306608745905986E-17 -0.338283783309686005E-18 0.266548033611127006E-17 0.116345931005788996E-17 -0.166613820907728004E-18 0.215419523463735996E-17 0.626804380968182010E-19 0.639509875176997045E-18 -0.297890899637496982E-17 0.119222803137702003E-17 0.431986803099692980E-18 0.235475159336694994E-17 0.261563774112127983E-17 0.307684718090124983E-17 0.745388993583784009E-18 0.363483013490502018E-17 0.542101086242752024E-18 0.819927892942163017E-18 -0.137908119795569001E-17 0.153143556863578002E-17 0.243945488809237997E-17 -0.169967748778416003E-17 0.284603070277445010E-18 0.346055310600744018E-17 -0.934065582584680084E-18 -0.214364186615361989E-17 0.121972744404618998E-17 -0.187702501111552994E-17 0.129087821161555006E-17 -0.375923809903299015E-17 0.745388993583784009E-18 0.496530713680471013E-17 -0.156912853478858993E-18 0.322888959493338981E-17 -0.216332214728747998E-17 -0.179570984817912009E-18 -0.179570984817912009E-18 -0.216332214728747998E-17 0.159764972230942001E-18 -0.124635604232549994E-17 0.583605700658213030E-18 0.123751513593852991E-17 -0.106874382119810999E-17 0.503334853245148010E-18 -0.570046969647549980E-18 0.186124902247292013E-17 -0.224104734636542000E-17 0.346772455512312988E-17 0.205875328297097990E-17 0.145346221537408007E-17 0.586146799499976027E-18 0.264697796016969010E-17 -0.372201497146810978E-18 -0.208443972253258984E-17 0.831015421872823993E-18 -0.985304458448664060E-18 -0.122467729283171000E-17 -0.128923708528025005E-17 -0.334881093141887986E-17 -0.263824293290113013E-17 0.237010406553594010E-18 -0.229704747383526007E-18 0.129257227751006001E-17 0.914372066561017023E-18 -0.293793377755153996E-17 -0.146488971553649996E-17 0.586824425857778967E-17 -0.383028298748394999E-17 0.325260651745650984E-17 0.116043513773838992E-17 -0.711507675693612044E-19 0.270785845325358986E-17 0.158620154263168996E-18 0.269861719145015013E-17 0.383335348191774008E-18 -0.514254878101766997E-18 0.233966381899399013E-18 -0.790076598996348969E-18 0.667991358028422990E-18 -0.269652938758407014E-17 0.390482188684231994E-17 0.140692172538938997E-17 0.380910716380259017E-17 -0.247757137071882994E-18 -0.199222149194211016E-17 -0.181773270480773007E-17 -0.490537955578646973E-18 0.115196480826585000E-17 -0.847198383376810945E-19 -0.315405916610788012E-17 0.144814344403536006E-17 -0.268858018189743015E-17 -0.495129841954081968E-18 -0.158074907822647006E-17 0.403686653748175000E-18 -0.171910438467481003E-17 0.160230685720048001E-18 -0.580566047782802009E-18 -0.580566047782802009E-18 0.160230685720048001E-18 -0.945765225168629963E-19 0.838986134255384004E-18 0.121803337815168004E-17 0.381164826264434972E-19 -0.338474365722817992E-17 0.347055868269689008E-17 -0.280497979692496991E-18 -0.317614840398065008E-17 -0.181985442307893013E-17 0.225148543511765008E-17 0.429147919237410989E-18 0.940467960525839941E-18 -0.454856692675558993E-18 0.142979161496525994E-17 0.279498538717379988E-18 0.440419909444671001E-18 0.234553680134310998E-17 0.186512684518457011E-19 0.991875581234786082E-18 0.226687192508932013E-17 0.186688708552808003E-17 0.291392237873034990E-17 -0.296599174392933981E-17 -0.949735692108884063E-18 -0.128833711277379005E-17 0.571747239396652970E-19 0.453162626781050970E-18 0.663129190387836005E-18 0.217093571646361010E-17 -0.296568412519281004E-17 0.396069170862700993E-18 -0.465832944811199039E-18 0.173788617681510005E-17 0.292773924322348985E-17 0.747797743527539004E-18 0.310340345257573986E-17 -0.667401578251672007E-18 0.168471584229874004E-17 -0.578829807955515980E-17 0.479071164337129953E-18 -0.370114100408684010E-18 -0.957904246464584045E-18 0.356807490403558990E-18 -0.988544004688977926E-18 0.260777962546842991E-17 -0.169343680949112000E-18 -0.331726748645320978E-18 0.152314043111593995E-17 0.754985966476637967E-18 -0.297055098494334995E-17 -0.629786780666742000E-18 -0.471802637821747000E-18 0.474004104058814025E-17 -0.183018673611032999E-17 0.454297022273105981E-17 0.143070440877121008E-17 0.922260060882323081E-18 0.793934569373296001E-18 -0.330004036250274988E-17 -0.365155903561329030E-17 -0.558321767182671991E-17 0.340233613449596002E-17 -0.638672768397092996E-18 -0.904571630663488944E-18 0.737624249173749028E-18 0.273475174932892005E-17 -0.610287238496723009E-18 -0.285280696635248008E-17 0.452885521275844996E-19 0.143734418754817006E-18 0.210147054416443989E-17 0.147160394056858999E-17 -0.253326999085539019E-17 0.491174926009342989E-17 -0.494642756650379982E-17 0.321644052671446983E-17 0.534345440819455030E-18 0.892521263499816052E-18 -0.104905030517445001E-17 -0.843221298991655988E-18 0.269229422284779016E-17 -0.344928028861956983E-17 0.700708005616119955E-18 -0.147680194353786994E-17 -0.533011198491406996E-18 -0.702080298252345043E-18 -0.236921071047437994E-17 0.328175636224288007E-18 0.405514376520035964E-17 0.228538753592071018E-17 -0.130887766174470991E-17 0.848968549887674039E-18 0.757247454845344985E-18 0.107234371122393999E-17 0.672544160119913988E-18 -0.233955793987558016E-17 -0.857197342621352039E-18 -0.433680868994201966E-18 -0.257498015965307014E-18 -0.151110677790167007E-17 0.113841228110978004E-17 -0.813151629364128037E-18 0.508219768352580023E-19 0.102167724434385003E-17 0.247117416265649998E-17 -0.302836777956074006E-17 0.121851355649726996E-17 -0.366020141865323971E-17 0.252415818281781983E-18 0.178808655165382994E-17 -0.542101086242752024E-18 -0.133068876013651007E-17 -0.149077798716756994E-17 -0.214129929065887010E-17 0.226581313390524994E-17 -0.329051124184614016E-17 0.433680868994201966E-18 0.429276297668479027E-17 0.542101086242751976E-19 0.131970380160180008E-17 -0.479420648145933992E-18 0.660685698858354024E-19 -0.711507675693611996E-18 -0.669083120031099994E-18 -0.599011112386401003E-18 -0.343234624711939003E-17 -0.195054689191077018E-17 -0.204929033676336999E-18 -0.454904338278842028E-17 0.175727572819745014E-17 0.357017653500514010E-18 -0.114005842222755001E-17 -0.367261574528644008E-18 -0.318721147938444995E-17 -0.278055444590598997E-18 -0.108154216643068992E-17 0.286835796186848008E-17 0.144473814824863007E-17 0.175732205031176005E-17 -0.276777279938202999E-17 0.315270956923971003E-17 0.331322231274440013E-18 -0.406575814682063982E-19 -0.190650738250813994E-17 -0.344001255703653014E-18 0.401387737880132007E-18 -0.550238940807836995E-18 0.166319913296329992E-18 -0.231558231661587994E-17 0.179282091988977989E-17 -0.379113418345304005E-19 -0.283504574423973992E-17 -0.850505037875492034E-19 0.215451811295723997E-17 -0.600969876076925994E-18 0.965194043396275044E-18 0.422912962652231018E-17 0.270344316993893994E-17 -0.545658624621219970E-17 -0.227004829864152009E-18 0.253432257818487008E-17 0.476667791067357997E-18 -0.109775469964156997E-17 0.393478567735145006E-17 -0.542101086242752024E-18 -0.286974762529756993E-17 -0.131324333068139999E-17 -0.230392961653169981E-18 0.265629532258949010E-17 0.313508069602497990E-17 -0.453162626781050970E-18 -0.273549290315775986E-17 0.514307817660970006E-18 -0.307181792277692011E-17 -0.460785923306338999E-18 -0.569206140554890020E-18 -0.328648783534668994E-17 0.174804770128381011E-17 0.894466792300540965E-18 0.303576608295941010E-17 -0.894466792300540965E-18 0.361767771772311983E-17 -0.211059806315422997E-17 -0.100288700954909006E-17 -0.830092288309213971E-19 -0.346436475427008993E-17 0.565288613173838978E-18 0.395511446808555009E-18 0.201424434857073006E-17 -0.695414049695780964E-18 -0.493777856601893997E-17 -0.332585501505931016E-17 0.490182939999496966E-18 0.203239589653500981E-17 -0.274086295969446009E-18 0.384873697335969983E-18 -0.622858729446303948E-19 -0.186535018394995997E-18 0.517960647246005043E-18 0.502290537721800005E-18 -0.168068542091994001E-17 0.219370283682532990E-17 -0.834135547143374013E-18 0.702852057763857018E-18 0.742186150251978975E-18 0.299452616633997011E-18 -0.852618070750257997E-18 -0.208606347807502990E-17 -0.405305265261183001E-18 -0.237848175077988005E-17 -0.266900081679829998E-17 0.347918783084704018E-17 -0.211758236813574988E-18 -0.154132203131700996E-17 0.376929661528163988E-18 0.262664916943558997E-17 0.316693613899383005E-17 -0.806375365786094019E-18 -0.826704156520197035E-18 -0.734112867473461040E-18 -0.262071993880480987E-17 -0.917283742317204066E-18 -0.243945488809238016E-18 0.172117094882074002E-17 -0.476614851508154026E-18 -0.271050543121375988E-19 0.149924831664010991E-18 -0.293611728892637004E-17 -0.138701645112892008E-17 0.199306852488936985E-17 -0.197300443195128015E-17 -0.137409919868329007E-17 0.401330662417864981E-18 0.251442911077089996E-17 -0.623416249179164953E-18 -0.179232171639009990E-17 -0.450060497858702980E-17 0.117250396136948002E-17 0.139756465830018993E-17 -0.331705050461928996E-17 -0.113723582348357996E-18 -0.464527261216917018E-18 0.124683311874379009E-17 0.389591589814325992E-17 0.189226867995338999E-17 -0.708799730321770986E-18 -0.708799730321770986E-18 0.189226867995338999E-17 0.730089460974003017E-18 -0.241616148204289018E-17 -0.642898006966013957E-18 0.193208215268705984E-17 0.114116513818835998E-17 0.940812894841274938E-18 -0.213640693092592999E-18 0.191601065377061008E-17 0.197291178772267999E-17 0.162956090277563995E-17 0.467328921951633987E-17 -0.169970726628621002E-17 -0.373541529739146016E-18 0.309167025747820009E-18 -0.140413908980875996E-19 -0.890122439723413042E-18 0.444510317573746012E-18 0.159521450258606000E-17 0.588714368121340047E-18 0.247807429653125996E-17 -0.150405258163781997E-17 0.211586183246164007E-18 0.825751244454536043E-18 0.100479283368041005E-18 -0.744541960636529998E-18 -0.301924894048795009E-17 0.654121193517132955E-18 -0.872008011489114096E-18 0.347595984938333010E-18 0.120877279715728994E-17 0.108739371704337996E-17 0.222961127874442003E-17 -0.224159359253126005E-18 -0.703638131074452959E-19 0.364852824584889013E-18 -0.251407909045142986E-17 -0.133273603215256995E-17 0.305124807351774998E-17 0.349674804807092995E-17 -0.668332363235947976E-18 -0.168391328646527988E-18 0.109031825667353006E-17 -0.648693735224327031E-18 -0.364916181449932026E-17 -0.650813065602986984E-18 0.385502589719925017E-17 -0.120598315164507008E-17 0.122143819895578996E-17 -0.100551719832776992E-17 -0.315327560109728015E-17 -0.315810940427846024E-18 -0.218116670784693986E-17 0.142069090570837004E-18 -0.386443752054550038E-17 0.128694000471918996E-18 0.282494506116690973E-19 -0.308648228407384022E-18 0.201310440079036986E-17 0.188112236626928001E-17 0.514747278364022024E-18 0.514747278364022024E-18 0.188112236626928001E-17 0.364092852397105992E-19 0.183392682345548997E-17 0.100813833416394001E-17 -0.180547891482710983E-17 -0.148124239527669002E-17 -0.121626326668438009E-17 -0.819908795219576046E-18 0.141548838806682992E-17 -0.102046261316098992E-17 -0.176117589589950993E-17 0.674229731042702993E-18 0.876369010356629040E-20 0.392250486364678998E-18 0.305615136027426994E-17 0.253348435073654018E-18 0.509936297130785021E-18 0.346726570183350994E-17 -0.498687128533197966E-17 -0.382091095733158007E-17 -0.130344191058839003E-18 -0.132234278800973008E-17 -0.290958106810184998E-17 -0.547838083491907977E-17 -0.148880064826396994E-17 -0.257873617436497015E-17 -0.166142330785849998E-18 -0.315692570397513015E-18 -0.292263116495110999E-17 -0.485469728861570009E-18 0.120185407756813993E-17 0.429089928317842021E-18 0.125501397022973005E-17 0.805596610804541017E-19 -0.244340208613324994E-17 0.286470517184008993E-18 -0.183891871488211999E-17 0.127540845256485994E-17 0.319791558987090013E-18 0.369842425732445988E-17 -0.145110647130905996E-17 -0.142820325539914005E-17 0.130250556227022007E-17 0.429164057413021993E-18 0.923727205858856957E-18 -0.206702571605096997E-17 0.375403241563379036E-17 -0.302615707496255018E-20 0.120185410341754004E-17 0.833068366285379940E-19 0.230376963355947018E-17 -0.555518782958974975E-18 -0.887232412834819968E-18 -0.103930280883612994E-18 -0.255683181701366009E-18 0.252700699284744995E-17 0.441434446465967964E-18 0.708476885929217991E-18 -0.800512309604317966E-18 0.294386300818232006E-17 0.223320236543596015E-17 0.855079760253216000E-18 -0.338161691451272986E-17 -0.735145188877927989E-18 -0.866514705041148958E-18 0.259976331798577006E-17 0.576172986546055986E-17 0.350862222576413020E-17 0.354864453252188973E-17 -0.407577010831044005E-17 0.449193684955051025E-18 -0.545627853502432998E-17 -0.652112798989854975E-18 -0.316399912411029001E-18 -0.733328286662454975E-18 -0.102626644238229001E-18 0.179973821776224996E-18 0.426600202950748001E-18 0.133225047713300999E-17 -0.149374260248296000E-17 -0.580937546874361965E-17 -0.151407139321705993E-18 -0.290677388387667010E-17 0.242759642683082015E-17 -0.291633443739655992E-17 -0.190359074366827988E-19 0.564585509653169000E-18 0.270940528099907018E-17 0.271364871754146018E-18 0.104165200177577996E-17 0.286929763904433993E-17 0.548919701468149027E-17 0.101931481651438992E-17 0.179570984817912019E-17 -0.677626357803439970E-20 -0.691178884959508980E-18 -0.161023610251002992E-17 0.125360876193635993E-18 0.285280696635248008E-17 0.387602276663567989E-17 0.254787510534094004E-17 0.243945488809238016E-18 0.224971950790742016E-17 -0.589534931288993036E-18 -0.846030404351885973E-18 0.133284687435465996E-17 0.132183461885973996E-17 0.130259894730511999E-17 -0.676196989704949025E-18 0.109267250195805009E-18 -0.239032697715164018E-17 0.211419423634673018E-17 -0.355330321373178992E-17 0.124683249835832991E-17 -0.117229359899994994E-17 0.204812566646090015E-17 0.154366460681175994E-17 -0.102999206386122998E-17 0.473067901041526970E-17 -0.445751088492576035E-18 0.175706396996064004E-17 0.234119906621088999E-17 -0.149416611895659004E-17 -0.149416611895659004E-17 0.234119906621088999E-17 -0.545767150717587043E-18 0.293270268735776000E-17 0.335791322687940991E-17 -0.198208356635465987E-17 -0.400158216617632984E-17 0.149475408927575000E-17 -0.266451620510562989E-18 0.215783878432394017E-17 0.296575751287548994E-17 -0.132635612379727001E-17 0.125805661614397001E-17 0.329964597375880009E-17 -0.725804251810673020E-18 -0.202273142845066994E-17 -0.361750742191450980E-18 0.238477798464633989E-17 -0.615240396004691044E-19 -0.294892886904875015E-18 -0.136428694961263993E-17 -0.460427427752502973E-18 0.408633405776274967E-17 0.241370248087692008E-17 0.212034179096043994E-18 0.920172551053097029E-18 -0.155108110818391000E-17 0.871671969365264044E-18 -0.135194748282488005E-18 0.579682825683748995E-18 0.307642366442761999E-17 -0.397258452262267013E-17 0.241912609735827984E-17 0.178893358460108000E-17 -0.498055372985528985E-18 0.296678583731738995E-17 -0.628909555627129947E-19 -0.257821443584816018E-18 -0.909587653898011082E-18 0.284076321663370992E-17 -0.257331256353816998E-17 0.642289202035175025E-18 0.575876525014516962E-18 -0.132401837567687995E-17 0.105540305227885996E-17 -0.410048649765807025E-17 -0.330766365902804013E-17 0.314333926726071010E-17 0.249366499671665981E-17 -0.134678238613434004E-17 -0.326266503370516004E-18 -0.803834266944331022E-18 0.584275019020739044E-18 -0.521181830722954026E-18 -0.246360856197893001E-18 -0.890764331878753999E-18 0.548568871229028054E-19 0.885333063976731012E-18 -0.171898057455870004E-18 0.173926304317850004E-17 -0.285280696635248008E-17 0.132475952950572997E-17 -0.372813610800100016E-17 0.343791462020794002E-17 -0.608794342927187998E-17 0.192763522971397014E-17 -0.638662842229741974E-18 0.836445035413621970E-18 -0.484502845829459987E-18 -0.647503748616709023E-18 -0.270095252911453979E-18 0.150473208466164007E-17 -0.401671791702481975E-17 0.289321267132416987E-17 0.217652064114137995E-17 -0.225518220867271997E-17 -0.839409650729012021E-18 -0.231409401189875007E-17 -0.246802570644996992E-18 0.105003309913973996E-17 0.491384988525900984E-18 0.217168659764162005E-17 -0.437069000783218975E-18 0.285079526310274996E-17 0.445195223120940015E-18 0.207681890754913995E-17 0.875990886138556925E-18 -0.940788906603511093E-18 -0.431139770152439017E-18 0.545235108147592974E-17 0.266137752027301012E-17 -0.111469866730911004E-17 0.519358251608974022E-17 -0.341298856642441984E-17 0.329834629660825014E-17 0.162630325872826002E-18 0.349655200626575026E-17 -0.140792757701426004E-17 0.634427677493470954E-18 -0.722519104007917997E-18 -0.278911008871896016E-16 0.584113920426565976E-17 0.682708555486965996E-17 -0.992044987824237019E-17 0.376929661528164012E-19 -0.102173349262550002E-18 -0.187074505590503005E-17 0.622484512937185011E-17 0.489839153397161963E-17 -0.178723951870656986E-17 -0.215485181781494006E-17 -0.132137139771670997E-18 0.650521303491303046E-18 0.110114283143059006E-17 -0.937384231202242061E-18 0.769355052567648023E-18 -0.277877270876202001E-17 -0.341527911112772982E-17 0.188585358734149006E-17 -0.307333598216620014E-17 -0.275261078554908998E-17 0.577606530023473009E-18 0.170577906601920992E-17 -0.163191393435032013E-19 -0.163191393435032013E-19 0.170577906601920992E-17 0.448670475468240001E-18 -0.291729493222043008E-17 0.269717696660611989E-17 -0.178990335531848991E-18 0.256607323391346977E-18 0.134101628714247995E-17 0.230979308011910995E-17 -0.403170793523692000E-17 0.147556225905089993E-18 -0.337953223511012990E-17 0.117050351108804007E-18 -0.178627074576569006E-18 0.405489664315588988E-18 -0.296848046543621999E-17 0.780119362087592001E-20 0.311241529209519983E-17 0.188868776166629986E-17 0.299491679632580009E-17 0.416857321052395034E-17 -0.175461764953809003E-17 0.476829251835777018E-17 -0.883650515905821941E-18 -0.237576416358402012E-17 -0.452084617380231971E-18 -0.281131514415595014E-17 -0.148642275037992002E-17 0.311120674489479000E-18 0.238552529446801001E-17 -0.793446617904934958E-19 0.120185555098361008E-17 0.429438776165916018E-18 -0.198933798085102997E-17 0.251437329702588004E-17 0.193038858216918014E-17 -0.381279230735173007E-18 -0.241638795747386001E-17 0.338070251827785015E-17 -0.883954289874912006E-18 0.230546801479983986E-17 0.280509763384942007E-17 0.255304094930993989E-19 -0.243909807437984001E-19 0.430172629206993001E-18 0.270270744496287993E-17 0.151309846511602999E-17 -0.110864972489187992E-17 -0.322256764930377990E-20 0.120185576747228005E-17 -0.704689046369850962E-19 -0.210033616755281988E-17 0.380112200160870019E-18 -0.169167342968315994E-17 -0.103844874485367003E-18 0.251310187995528003E-17 -0.687636018697156054E-19 0.690260094094115994E-18 0.443239768152865043E-18 0.344151637139014981E-17 0.106920187246231007E-17 -0.219024191314240976E-18 -0.110791909500862003E-17 0.171270061934819991E-17 -0.592029708016452967E-18 -0.107287972426087994E-17 -0.206796869538028005E-17 0.276609196837732003E-20 -0.759576795450294022E-18 0.875726188342539956E-18 0.519314816225762026E-18 -0.109032911543856009E-17 0.142830372383842998E-17 0.100573850791572003E-17 0.604142927981981971E-17 0.479671630253418989E-17 0.110844683623943006E-18 -0.235422778124405997E-17 -0.161887517682752998E-18 0.235435785539538009E-17 -0.177241644212961999E-17 0.210064170919065983E-17 0.814342769446204974E-18 -0.282994638255850981E-17 0.217052192733914010E-19 0.234776357155211005E-17 -0.308228175752572013E-20 -0.145160674586253994E-17 -0.227329022625726990E-17 -0.495544052645817993E-18 0.544329179940724975E-17 0.237253928525929987E-17 -0.201932654625425008E-17 -0.155176435936987995E-17 0.160208341039270009E-19 0.892772726406032943E-18 -0.179570984817912019E-17 0.110643678735093001E-17 0.243945488809238016E-18 -0.147722546001149998E-17 -0.300791563965369003E-15 0.123197871683703990E-15 0.519874941706799006E-16 -0.398570506297564017E-15 -0.938756451046574014E-15 -0.674170463378642972E-16 0.300205417165869003E-16 -0.819385791855919976E-16 -0.474473975733969001E-16 -0.327433820650950991E-17 -0.162460919283374998E-17 0.252287936159080996E-17 0.695202291458967003E-18 0.296090954624581016E-17 0.361391397802216010E-17 0.408203287321918007E-18 0.495238006025435965E-17 0.275835450342189021E-18 0.364880639030068002E-19 0.194083312525632001E-17 0.278119002058286018E-18 -0.641258404061236032E-18 0.165955267525392004E-17 0.475587792606270030E-18 0.475587792606270030E-18 0.165955267525392004E-17 0.499313877686834030E-18 0.118594492762701009E-17 -0.532072040726810014E-18 0.597479878085993991E-17 0.616095653587975001E-17 0.267973686332527998E-17 0.237255357212338008E-17 0.415645509636498002E-18 0.406581717756872996E-17 -0.216855405062163982E-17 0.128431581959682994E-18 -0.333001210673627003E-17 -0.529555996006423025E-18 0.158490256251231992E-17 0.779339004809818954E-19 -0.184932006891523006E-17 -0.121805361876716993E-17 0.305020810095135983E-17 0.451000953589631032E-17 -0.143328195155579993E-17 0.261449190125046997E-17 0.213311232586893987E-17 0.308627464263071995E-18 -0.142468127993853007E-17 0.138447030256762007E-17 -0.416491634298082012E-18 -0.634896151970501991E-18 0.766861496138588998E-18 -0.678597100070076063E-20 0.120185447177140002E-17 0.429046681675597000E-18 -0.176084645720124014E-17 0.144825178137232999E-17 0.314218888807008010E-17 0.734785045431920036E-18 -0.117162594263929007E-17 0.608863462349075997E-17 -0.662627775282060006E-19 -0.127825823877199996E-17 -0.171021320481935997E-18 0.496852428684837022E-19 -0.322174981279925987E-17 0.429050616589522023E-18 0.104417858731019005E-17 0.163454775014506994E-17 0.178823488869088012E-17 0.581478820662879978E-19 0.120185445884671008E-17 -0.129816506770870004E-18 -0.127996804138450008E-17 -0.154357189784606990E-19 0.214940359022386997E-17 -0.331603456810303990E-17 0.189241510509073018E-17 0.197721030105515002E-18 -0.416669355256307981E-17 -0.851010143081933984E-19 -0.125601149543838993E-17 -0.238461833303634009E-17 -0.169618840330709007E-17 -0.169618840330709007E-17 -0.238461833303634009E-17 0.400549807943398004E-18 -0.271531580761243988E-17 0.199875144619239998E-17 0.743049289256009004E-18 -0.307195681303235978E-18 -0.282936086004326019E-17 -0.136888465210175001E-17 0.725986645135740000E-18 -0.163159410949012006E-17 0.172113641403015996E-17 0.328205462052660005E-19 -0.427613578091477998E-17 -0.808777579770756965E-20 0.234837217968203999E-17 -0.253264027376469004E-19 0.574118313848444962E-18 -0.393557272547890009E-18 -0.326626289571594991E-17 -0.775608933612533044E-18 0.105058856049080991E-17 -0.972396842945079028E-18 0.134638939547039006E-17 -0.102121372777366003E-18 -0.136083563029536003E-17 0.281128595079716003E-17 0.405831033256048975E-19 0.242827227208932005E-19 -0.170833281491397008E-17 -0.127819918718633995E-17 0.635195301101919971E-18 0.626566152951766979E-18 -0.199179797546848994E-17 0.132218081797795004E-18 -0.149746601790291009E-17 0.212336552384022990E-19 -0.292432710560827011E-17 0.729454186263562966E-18 -0.214066401594843015E-17 0.303724839061711014E-17 -0.113084192414368992E-17 -0.340000958562557999E-20 -0.173530847038446989E-17 0.626566152951766979E-18 -0.199179797546848994E-17 0.132218081797795004E-18 -0.149746601790291009E-17 -0.127819918718633995E-17 0.635195301101919971E-18 -0.108475496177924000E-19 -0.173530847038446989E-17 0.407397618620635981E-18 0.303835815115187008E-17 -0.182128580743015985E-17 0.358929916256484033E-17 -0.155546948389484994E-17 0.165743789754505008E-17 -0.733902645660011989E-18 -0.390963850189295982E-18 -0.130426075704324999E-17 -0.336457008402797007E-17 -0.323961040779898006E-17 0.134737673440338004E-17 0.209549852390063008E-18 0.950982283458043069E-18 0.311317695677142983E-17 -0.147193675656687989E-18 -0.592093288710995990E-17 0.271635327503675007E-17 0.162966204193836999E-17 -0.257298989513354013E-17 0.145506049662061992E-17 -0.187935630575551008E-17 -0.341302337267962986E-19 -0.249774864015655006E-18 -0.557204531224242037E-17 -0.140921796185665985E-19 -0.191936809076605013E-17 0.109847716108033996E-17 0.307745161082448993E-17 -0.606673286482521039E-18 -0.725513167447356014E-18 0.223354010499117018E-17 0.660402286256383978E-18 0.261454292557767008E-17 -0.573978120970277956E-18 0.106320334455071992E-17 -0.381807789389963005E-18 -0.201346560792103982E-17 -0.501719392383432000E-18 0.170682457038868001E-18 0.276999327163805993E-17 0.143519550603408003E-17 0.166110181328899991E-17 0.857665614334221004E-19 0.132134526597416992E-18 -0.163342014702957008E-17 -0.159162656391022990E-18 0.333481192424111018E-18 0.110073055806264995E-17 -0.165744961785494992E-17 -0.723664504377990964E-18 -0.373996400265941977E-18 -0.369427622167534017E-19 0.137017753030935008E-17 0.219171077410990982E-17 -0.120449150112210007E-17 -0.380275891434780971E-17 0.355095637040657016E-17 -0.545627094050522035E-18 0.120185446207787991E-17 0.297192801651785003E-17 0.587392766169290955E-18 0.135525271560688006E-18 -0.279859685772820986E-17 -0.256481576428601989E-17 0.100246349307546002E-17 -0.813151629364128037E-18 -0.449943901581484032E-17 -0.105709711817336990E-17 0.673391193067168962E-18 0.214299335655337986E-17 -0.399799551104030001E-18 -0.399799551104030001E-18 0.214299335655337986E-17 0.815057453495450983E-18 0.164519606391896997E-18 -0.132860922807655002E-17 0.338956115711568991E-17 0.276652871974074985E-17 0.209062470706900983E-17 0.640476975732763991E-19 0.210311470833367019E-17 0.292498933315886017E-18 -0.266081026581987999E-18 -0.339530842093640994E-18 0.891473360161341033E-18 0.127055950214517004E-17 0.864474929349381958E-18 -0.173876150030865007E-18 -0.268677531257492988E-17 0.262439820419368005E-17 -0.394640487662082006E-17 0.239815370840882017E-17 -0.692232213035683983E-18 0.895380620082258923E-18 0.271301013410857011E-17 -0.270990856870302012E-18 -0.462501914737739018E-18 -0.282121724895181004E-17 -0.224758925933616008E-17 -0.213500811600352005E-17 -0.540097564326268985E-19 -0.571747239396653018E-18 -0.201000918383445008E-17 -0.203393786459438984E-17 0.320178454062126010E-18 -0.899548989984066960E-18 -0.643639160794860989E-18 -0.373628383703464999E-18 0.290045029488884995E-17 0.443262929210015969E-17 -0.124590605607226994E-17 0.105385457017215992E-17 0.250840866395480999E-17 -0.133195930955738998E-17 0.189142457121884996E-17 -0.166018457661843011E-18 0.441388868814215984E-17 0.256481576428601989E-17 -0.336229728412594982E-17 0.516690097825123026E-19 0.324032453972133018E-17 0.344954167769314006E-18 -0.224103742019807013E-17 -0.501443504774545993E-18 -0.975781955236953991E-18 0.473322010925703029E-17 -0.283733538017529017E-17 -0.116551733542191996E-17 0.253093444639585018E-17 0.203287907341032009E-18 -0.279870273684662012E-17 -0.303322498411764990E-17 -0.289685267960971005E-18 0.198205709657505990E-18 -0.884990779140575957E-18 -0.239498565836152988E-18 -0.148604651406377009E-17 -0.211101786279453002E-17 -0.360401931144864038E-17 -0.530348504099598992E-18 0.888749319906575016E-18 -0.337449898656761984E-17 0.172225982869958989E-19 0.542094468797851985E-18 0.101984131697427992E-17 0.152297909443849011E-18 0.134546096510577998E-18 0.167670171908989002E-17 -0.190413006542766986E-17 0.281780730027413984E-17 -0.127285229170680006E-17 -0.171465938303872008E-17 0.128071381624849999E-17 -0.709098925749857965E-18 0.231568881611974999E-17 0.245639554703747008E-19 -0.577358832672211987E-18 0.380971266001097985E-18 0.689428570783346966E-18 -0.300781399570002012E-17 -0.515313669285834969E-18 -0.334866534763107014E-17 -0.885606033578872933E-18 -0.755712621218751975E-18 0.215419523463735996E-17 0.672544160119913988E-18 -0.336399135002044994E-17 -0.127245524501276999E-17 0.482123254002297968E-18 0.163985578588433008E-17 -0.132475952950572997E-17 -0.288668828424266018E-17 -0.171100655345368996E-17 0.948676900924816019E-19 -0.311083437790982016E-17 -0.542101086242751976E-19 -0.274438674910393021E-18 -0.137908147745226995E-17 -0.582758667710959018E-18 -0.948676900924816019E-19 0.260526158751741003E-17 0.896160858195049951E-18 0.346055310600744018E-17 0.406628754241267990E-18 -0.158687652201152991E-17 0.481114714040442978E-18 -0.347283508374262985E-17 0.670850094225405965E-18 -0.124582664673347005E-17 -0.243945488809237997E-17 0.605120337518472008E-17 0.677626357803439982E-18 0.306668278553419014E-17 0.286720652645581002E-18 -0.311708124589583006E-18 -0.779270311473956023E-18 -0.112231865511195007E-17 0.422245924206269008E-18 0.785411300341549989E-18 -0.177876918923402994E-17 -0.271008191474013008E-17 -0.191577676845240994E-17 0.192117660349115996E-17 -0.172907527995913996E-18 -0.196332952071170988E-17 -0.295853553788778990E-17 0.297413657783084012E-17 0.238766345557921019E-17 -0.258331152278270982E-17 0.434993770062446016E-17 -0.391244518336761018E-17 0.249172443099960983E-18 0.295345913046855004E-17 0.180478236513759987E-17 -0.333194968181260020E-18 -0.398801640413046019E-17 0.127766979159431005E-17 0.406976831843030025E-17 0.125052834133521992E-17 0.584942424528099012E-18 0.105310018145351002E-17 -0.248180653545509999E-18 -0.766564817265142048E-19 -0.351391618168446033E-17 0.116087933372732994E-17 -0.435374934888709989E-18 -0.120702194983737991E-17 -0.184250841851491989E-17 0.169237182861408996E-17 -0.340337838206777992E-17 -0.248543289526053016E-17 0.456352235223054964E-18 0.241855596312107998E-17 -0.178644542531852011E-17 -0.843300708330460982E-18 -0.535150122119347000E-17 0.259308548890062985E-17 -0.842797782518028958E-19 0.840044925439451976E-18 0.223701401369860989E-17 0.350290475337015982E-17 0.140776875833665005E-17 -0.137781820271733003E-17 0.193970544921234994E-18 -0.692872950854017966E-18 0.230816478126796993E-19 0.914583824797830984E-18 -0.332036915323686022E-18 -0.124683249835832991E-17 0.221583819001725007E-17 -0.271333769763113996E-17 -0.102999206386122998E-17 -0.175844039849992982E-17 -0.120617491689012002E-17 -0.683979104907848005E-18 -0.119558700504943992E-17 0.169406589450860008E-19 -0.143995601033231009E-18 -0.298933170139291000E-17 -0.111914228155973999E-18 0.259477955479513999E-17 -0.505043394800376962E-18 -0.261627301583172017E-17 -0.559571140779871969E-18 -0.249122977699329985E-17 -0.244248420444131008E-17 0.763575231435010022E-18 -0.498453660450472978E-18 0.131165285269981993E-17 -0.409226721750142997E-18 -0.166919670939219005E-18 0.356664398265104977E-17 0.123031535588687009E-17 0.140753053032022997E-18 -0.174446435487023001E-17 -0.348347593514250969E-17 0.608010837450976981E-19 -0.288229430082877002E-18 -0.270444385168497019E-17 -0.511978477056020969E-18 -0.180010383159299994E-17 -0.519787062038522031E-18 -0.778333943020545996E-18 0.223510818956729003E-17 -0.461209439779967016E-18 -0.254326936369024016E-17 -0.754514450101469011E-18 -0.729630168938883045E-18 0.215419524756206011E-17 0.423516473627149977E-18 0.118034041199887003E-17 0.526282745952778004E-17 0.189749380216831006E-17 0.362191288245938979E-17 -0.373710936328597001E-17 0.325260651745650993E-18 0.315943289325853988E-17 -0.894466792300540965E-18 -0.152582397536022009E-17 -0.189735380184963011E-18 0.109436656785256008E-17 -0.137907918816530007E-17 0.548877349820787005E-18 0.989334482393022989E-18 0.359374903696317968E-17 0.372694496791892029E-19 0.346055310600744018E-17 0.325207712186447985E-18 -0.166962105304642995E-17 -0.189715527850262003E-18 0.189418900882599987E-17 -0.165532075461661994E-17 0.311165494107747979E-18 -0.215505034116195009E-17 0.118796370852416008E-17 0.601181634313739955E-18 -0.347389387492670004E-17 -0.208708918203459989E-17 0.337288519596662010E-17 0.931736241979729988E-18 -0.145689666927739995E-18 0.227220724004029024E-18 -0.162143943672644003E-17 0.174742897018562006E-17 0.323481882556416991E-17 -0.635274710440724965E-18 0.375235595633655002E-18 -0.145080924035446995E-17 0.143389882520053003E-17 -0.235633978014306013E-18 -0.112496563307211995E-17 0.293844332080887002E-18 -0.161068608876326000E-19 -0.307600014795399014E-17 0.258090939028385005E-17 -0.476763082273924010E-17 -0.228127148519264015E-17 -0.309844652105622987E-17 0.174912303608012996E-18 0.531513174402072981E-18 -0.382435375685316988E-18 0.900819539404948025E-18 0.326277091282355990E-17 -0.264724265796570986E-18 -0.879365783037773085E-18 -0.260547334575423015E-17 -0.143360326322790005E-17 -0.504831636563563002E-18 0.179242759550851016E-17 0.298155597433513989E-17 0.101982766849417992E-17 -0.297393267780985003E-17 0.257498015965306995E-17 0.151788304147971006E-17 -0.326954717640159979E-18 0.230392961653170000E-17 0.139591029707509004E-17 -0.742847894742021012E-18 0.256650983018053003E-17 -0.153143556863578002E-17 0.409540429997454021E-18 -0.142725051612349992E-18 -0.443845264361252992E-18 0.113671821521527009E-17 0.263935466364439986E-17 0.180841534238793007E-17 -0.220183567660795011E-17 -0.102829799796672003E-17 0.334027442749732992E-17 -0.454009659728304982E-18 -0.325260651745650984E-17 -0.522448571488674997E-18 -0.336345389745121001E-17 -0.457041606071333975E-17 -0.304355831294945982E-17 0.427237952841651978E-17 -0.186941463343081017E-17 -0.129563254076186006E-17 -0.112984180780269004E-17 0.463510525760183965E-18 0.543403608029869993E-17 0.628157090539567034E-17 0.107863850412424995E-18 -0.496782957561507984E-18 0.143475058212455998E-17 -0.157547458689992002E-17 -0.229157071099953982E-17 -0.353871744751775981E-17 -0.655427268083501956E-18 -0.373057479213008010E-18 -0.288389363397911984E-17 0.267562859761936013E-18 0.221342377338269010E-17 0.759040992641635024E-19 -0.427051187586338014E-18 -0.246546273902076005E-17 0.311034949497450000E-17 -0.250275745587688010E-17 0.149384832852448007E-17 0.785170068645730031E-18 -0.124784693198205006E-18 -0.251252152874503999E-19 0.306800057149170005E-17 0.512280225515675992E-18 0.352936752798798985E-18 -0.468641674547420009E-18 0.342326765393769015E-17 -0.232885905658585013E-17 0.440296799120129026E-17 -0.682831107464595018E-18 -0.159760541806344992E-17 -0.278081237108601981E-17 0.216042711637734983E-17 0.166110979752061002E-17 0.117815888393792008E-17 -0.169403964681784996E-18 0.249326037070838015E-17 -0.597174026640863979E-18 -0.197246651090594008E-18 -0.770422236383328028E-18 -0.405218297824251983E-18 -0.384955849310639988E-19 -0.747681707350008986E-18 -0.163334696595593989E-19 0.145763242918459997E-17 0.291859303545338004E-17 -0.294086889444445013E-17 0.108848259833857008E-17 0.139201626084264003E-17 0.337320625108601995E-20 0.120185558006418003E-17 0.379317687846262973E-17 -0.300611688448085986E-17 -0.418480598057927000E-18 -0.261770197033990986E-18 -0.125675852095236000E-16 0.569849893866608996E-20 -0.467203818658997981E-18 -0.969974113361066045E-18 -0.308892401784452019E-18 -0.128391665958031008E-18 -0.149140333571066003E-17 0.168559556503606008E-18 0.264274279543341995E-18 -0.103443898683430996E-18 -0.166316242682362005E-18 0.493529040673638011E-19 0.366553507924297990E-18 -0.965617559869902025E-19 -0.628914596258986970E-18 0.680130388614003019E-19 0.250558797806600002E-18 -0.222187329976643995E-18 0.573591561356962000E-19 0.337396632102723016E-19 -0.405891570879359990E-17 -0.890046339107058019E-19 0.484291087592646026E-18 0.594828887209332028E-18 -0.110802497412702995E-18 -0.409245953699384999E-18 0.386593778060743030E-17 -0.846764863036188956E-19 0.417633151520367001E-19 0.269234881676822002E-19 -0.653364819565015025E-17 -0.108858622973204005E-18 -0.264863232139479010E-19 -0.124010917433949999E-19 0.374812079160027996E-18 0.271262301358190021E-18 0.288414718540088996E-17 -0.396919639083364985E-17 0.240811466904398010E-17 -0.235051642863068013E-19 -0.148738985537855005E-17 -0.355753837846806022E-19 0.407761660808220029E-17 0.397766672030618977E-17 0.493311988480904980E-17 0.739565642071410984E-19 0.343477958748839018E-14 -0.366744259744019982E-14 0.123993155153046002E-13 -0.668060306510329026E-14 0.135525271560687994E-19 0.152465930505773989E-18 0.242251422914729993E-18 -0.542312844479565985E-18 0.462585868319255004E-17 -0.919879898349006038E-19 0.210064170919066003E-18 -0.643745039913268017E-19 0.125157058890702999E-16 -0.582864546829365060E-19 0.246041564481448014E-18 0.101296537813243998E-18 -0.238514220907215010E-17 -0.797809496952801045E-19 -0.192534559377843010E-18 -0.187360213774079013E-17 -0.389635155736977975E-19 -0.200746808499269012E-18 -0.245893664587923003E-17 -0.948676900924816019E-19 0.000000000000000000E+00 -0.108420217248549998E-18 0.261375838676955993E-18 0.725966792801038968E-19 -0.167646349107347008E-18 -0.805740091075652957E-19 -0.511324047944521005E-17 -0.804386616798363005E-20 0.803374354523752020E-19 0.233691757936031022E-18 0.322467056021906979E-19 0.692362580416071978E-19 0.782403671634307027E-17 0.714617874796812030E-19 -0.205034912794744004E-18 -0.370259277068535988E-18 -0.219593291575676978E-18 -0.226587930835426006E-18 0.589328467008100031E-17 -0.458175341293122029E-19 -0.555484868553859964E-19 0.307568912804366982E-18 0.664220076179670995E-17 -0.838628792230762059E-19 0.133440776417054003E-19 0.513182852027898008E-19 0.130496013436366007E-19 -0.467879824239593957E-18 0.114281685243550001E-16 -0.521729943861285986E-17 0.141818726358787999E-16 -0.217687467444354995E-18 0.338813178901719991E-18 0.218534500391609006E-18 -0.564123942871364026E-17 0.155854062294790998E-18 0.738612730005749992E-18 0.132137139771670997E-18 0.652215369385811020E-19 0.528548559086683026E-18 0.393023287525995020E-18 0.813151629364127964E-19 0.287313575708658983E-17 0.687790753170491972E-18 0.106387338175140007E-17 -0.813151629364127964E-19 -0.669156028330897037E-17 -0.169406589450859993E-20 -0.383705925106198005E-18 0.159877468794249008E-19 0.637946834691517017E-17 -0.251992301808153990E-19 0.130597260343342002E-17 -0.397914845927722009E-19 -0.771823101021564955E-19 -0.678629956492047021E-19 0.244403275117150019E-18 0.616306942007001003E-19 0.578090437734863976E-17 0.519520791676689028E-19 0.250034502269737982E-18 0.145212009554413988E-18 -0.175005708853890006E-18 0.394009615144800003E-18 -0.110346462401244998E-18 0.222948572015913991E-18 0.171997642246803001E-18 0.319375928924822986E-20 0.101563716873418997E-18 0.901803069674074007E-19 -0.274728422129961016E-17 -0.421283293404232023E-21 0.608978550920113979E-19 0.763010629372050983E-18 -0.286952614910220011E-18 0.730908685021870015E-19 -0.451919320602110971E-17 0.589814511873686954E-22 0.126157748391557989E-18 0.141953434272505006E-18 0.100040548709848005E-17 0.102890605326513008E-20 -0.935924522377375949E-19 0.847890824022396966E-19 -0.879092467376866968E-17 0.476821187834545992E-19 -0.753033951901363993E-19 0.168555449681110991E-19 -0.100415019031744998E-16 0.329480295536328014E-20 -0.430020650141573979E-19 -0.930238733580653041E-19 0.843287605802546953E-19 -0.745054355771767074E-17 -0.127373076708930990E-18 0.142668167903681001E-19 -0.212738678080100992E-18 -0.124931763730073991E-18 0.141197446866016998E-17 -0.623935018560200991E-21 -0.208825078665083006E-19 -0.702882739988749020E-17 0.135013553282889001E-18 -0.273662399296958983E-18 0.979870564908593936E-17 -0.967980574380733076E-17 -0.586955097351761970E-17 0.928385604696770927E-21 0.588030583085907051E-19 -0.303121635649031013E-20 -0.785213004720344977E-17 -0.143126772192240001E-21 0.442508069709277023E-17 0.348261833625540021E-18 0.403209644538310013E-17 0.158323610013567989E-18 -0.736025309049684025E-19 0.252063439340834008E-18 0.327873715300706015E-21 0.308873893618246024E-19 -0.782545946699665940E-19 -0.916516118708754965E-20 -0.330581739190081984E-17 0.194817577868489006E-18 -0.215146368602591997E-18 0.701978555037001045E-19 -0.378514539581815021E-17 0.926442286059390996E-20 0.154159996400282999E-18 0.327166475876973987E-18 0.149444776103045990E-18 -0.384315608916437980E-19 0.541531313897068035E-18 0.866120139888960971E-19 0.110405247154699008E-18 0.129192718002985003E-17 -0.571019320457605944E-19 0.236706004088174008E-19 -0.840441972133477969E-17 -0.208661272600177011E-17 -0.128778654135807999E-16 -0.241127284462270001E-19 -0.124068820076829006E-18 -0.257368975789749001E-19 0.361217365792718986E-18 0.122756911625320001E-18 -0.415100531279882990E-17 -0.172979513388722001E-17 0.192235450868344009E-17 -0.817419881324901948E-19 -0.542101086242751976E-19 -0.247333620598255988E-18 0.251695840276614999E-17 -0.439123055680311038E-17 0.812389299711598993E-17 0.749624158320056041E-19 -0.872105122493027937E-17 -0.372694496791892029E-19 0.467901000063275970E-17 0.399121924746226012E-17 0.660008072500550978E-17 -0.527542707461818990E-19 0.343843876982052995E-14 -0.367179973492087989E-14 0.124130984354223001E-13 -0.668730478978197023E-14 0.811118750290718005E-17 0.122819777351874007E-18 0.342201310690737000E-18 -0.599540507978433977E-18 -0.469610947825544972E-17 -0.214310597913247990E-19 0.372694496791892029E-19 -0.345589442479755020E-18 0.437836624391667992E-18 0.205643717725583008E-18 -0.312684304341145982E-18 -0.472009937038071041E-21 0.120475133905591992E-18 -0.260656398339187997E-18 0.673638442522140059E-19 -0.964679226862538027E-19 -0.287342692466221023E-18 -0.221571907600904013E-18 -0.434358495352004983E-17 -0.228529489169210000E-17 0.277911509994136018E-17 0.156800809279949005E-22 -0.476438579319618995E-17 0.793257935632228010E-19 -0.191429446079471997E-18 -0.792505201274804995E-19 -0.761249535594635047E-19 -0.759284593898908991E-19 0.245210185927285981E-17 0.326322751652168995E-21 -0.219505881849384995E-18 -0.351708553712615972E-17 -0.957527733479134017E-19 0.210332177437534002E-18 0.632850078629209974E-17 0.783703999557241003E-17 0.712107216201591027E-17 0.931799314501438023E-19 -0.162733723449395010E-18 0.273630519451929002E-18 0.405238470426719007E-18 0.290400572395675995E-18 0.309034676849811019E-19 -0.982194259345465061E-17 -0.459295178406396010E-17 0.378501304692013982E-19 0.660156303266319966E-19 -0.393711501795638995E-18 0.111130722679763993E-17 -0.731836466427715011E-18 0.133492392487278003E-17 0.170253622398114007E-18 0.678102813836270969E-17 0.116382328043262001E-19 0.652850644096251986E-18 0.135101755087061000E-18 0.135525271560687994E-19 0.406575814682063982E-19 -0.688214269644119025E-20 -0.338813178901719985E-20 0.291379333855478980E-18 -0.673391193067169010E-19 0.575558887659297003E-17 -0.529395592033938013E-20 -0.101643953670516005E-18 0.225310763969643986E-18 -0.860585474410368990E-17 -0.355753837846806022E-19 0.111384832563939989E-18 -0.619022265765282981E-18 0.659097512082252000E-20 0.367135843075535984E-19 -0.148258098918869009E-16 -0.403656384107634971E-20 0.301976499779261008E-19 0.189408714322993997E-20 0.272869291952943018E-18 -0.557349643847283955E-19 0.426147018319669965E-17 0.254580724327268992E-21 -0.869775563299138967E-19 -0.110887275953398993E-18 -0.998341549897226014E-19 -0.481248056751729986E-20 -0.185349767128022002E-18 -0.145910104401558996E-18 -0.131668031408930009E-17 -0.607341738615819030E-21 -0.624901402953516003E-17 -0.116001184478416006E-20 -0.833449305952115017E-20 -0.220151721333313981E-21 0.165335936503532999E-18 -0.129960524270914003E-18 -0.396916978071411982E-18 0.219508923152721984E-18 -0.827237810600491973E-17 0.341451599951967009E-18 0.159674085227868006E-18 0.188315067856254009E-20 -0.847491822467939984E-19 -0.455167206907106011E-19 -0.209933309261154010E-17 -0.145404646964087995E-21 0.168411398277699010E-18 -0.313218061614544984E-18 0.560594904015392966E-19 0.153058648530649989E-18 -0.122794367412262001E-17 0.111914706524383009E-20 0.171517425742714004E-18 -0.183906875580670001E-18 0.219862189175055999E-18 0.897452583439613029E-17 -0.352102288514519020E-19 0.153068958878854008E-18 0.146666444626402991E-18 -0.303260900504276985E-19 0.235125754644082987E-18 0.538382462411191964E-21 0.130959794039688006E-19 0.881466470414765984E-17 -0.176754757224459993E-18 -0.338370818476285984E-19 0.458932469699724984E-17 -0.582300740523848969E-17 0.582615730901109008E-17 -0.949456505418176036E-21 0.113959685864603008E-18 0.144076135613940989E-19 0.582626318812949995E-17 -0.116571720284226000E-20 -0.141483631752557998E-16 0.100448206267148993E-17 -0.113650645697845998E-17 -0.471162076910205015E-20 0.673391193067169010E-19 0.104026233834669001E-18 0.535324822664718007E-18 0.223616698075135001E-18 0.861263100768173047E-17 0.000000000000000000E+00 0.243945488809238016E-18 -0.174488787134385990E-18 0.271050543121375988E-19 0.487890977618476995E-18 -0.125527106409534999E-16 0.100472472860477996E-18 -0.955823741417275020E-19 0.107162902717469998E-18 -0.101127663721311999E-18 0.305449075076592991E-19 -0.416346654315768998E-17 -0.370398036616291987E-19 0.639750339166004974E-19 -0.124552860677330003E-16 0.295344837712059013E-19 0.438212164389766970E-18 0.101705744062273998E-16 -0.262887759400580013E-17 0.254481784579693981E-17 0.126166498411475007E-18 -0.267108613912254992E-18 -0.118145276312759989E-18 0.181096822855342012E-18 0.187530220069472992E-18 -0.423863827445876037E-17 -0.640878176666766969E-17 -0.881828403118920017E-17 -0.228818836947481010E-19 0.146804706393460995E-18 -0.227785688362402982E-18 0.113841228110978004E-17 0.179570984817912009E-18 -0.121972744404618998E-17 0.237846851589007997E-17 -0.233781093442186990E-18 -0.337119113007212016E-18 -0.477943634444158998E-17 0.130072496962737990E-18 0.000000000000000000E+00 -0.406575814682063982E-19 -0.595040645446145988E-19 0.148442524006315990E-18 0.293073399749988014E-18 0.321449003483006979E-18 -0.327462937408513012E-17 -0.140607469244213991E-17 0.440457132572235984E-18 -0.105549829437158004E-18 -0.242918130488448022E-18 -0.188834994088198992E-18 -0.239202104304613992E-17 0.119405175783255012E-18 0.120003227366130008E-18 -0.403498702803366996E-18 0.243945488809237997E-17 -0.745388993583784058E-19 -0.514996031930614991E-17 -0.502876181595488006E-19 0.874958564734091035E-19 -0.126038502551440006E-17 0.198205709657505990E-18 0.262368455412020012E-18 -0.435120825004534007E-17 -0.847032947254300038E-20 -0.933853824347866003E-19 -0.218110983917982006E-19 -0.158951026508189991E-18 -0.123216824045899002E-19 0.105479069047138999E-16 -0.221602720078721986E-19 -0.112094436116716006E-16 0.552668609373801021E-20 0.600580946092660953E-19 -0.150321091163205985E-19 -0.566633402054696966E-18 0.471817359051710964E-20 -0.309740174670613979E-18 -0.713982335130080985E-19 0.717113685500036983E-17 -0.388401777744873018E-18 -0.159250724383875008E-18 0.102895322840944002E-18 0.202123237038557009E-18 0.107599654080898005E-19 -0.115560962453870006E-16 0.115264413034391004E-18 -0.323460706732736002E-19 0.217151454407421018E-18 -0.162863590805566010E-19 -0.447140234933781975E-19 -0.458895903494596031E-17 -0.264017439963143991E-19 0.132772414482112011E-18 -0.946559318556680990E-19 -0.602663941971434999E-17 0.533630756770209021E-18 -0.107064964532944006E-17 -0.152465930505774001E-19 0.198642461020933989E-18 0.271326821445969012E-18 0.764647908913610958E-17 -0.285476573004300971E-19 -0.423615735300657014E-18 0.405577242246589994E-17 -0.569841415265331035E-18 -0.202440874393777998E-18 -0.138405183581353003E-17 0.741153828847513037E-19 0.440457132572235996E-19 -0.145689666927739995E-18 -0.201593841446523018E-17 0.118584612615602002E-19 0.100288700954909006E-17 -0.389635155736978012E-18 0.253678923077150013E-17 0.463253320355523965E-17 -0.542861329849173998E-18 -0.578182497767161953E-18 0.378034402595259001E-17 -0.390167653256308987E-19 -0.809119123877932027E-18 -0.637257875959320984E-17 -0.796210970419041958E-19 0.108526096366957005E-19 -0.806968288849171933E-17 0.477726582251425006E-18 -0.325260651745650993E-18 -0.114409418473740993E-21 -0.136584062744755990E-19 0.204770214998727011E-18 -0.769571784227895035E-17 0.308108234563751989E-19 0.136366586621958005E-17 0.456872337957895004E-19 0.560083992759655039E-19 0.178381499097059997E-19 0.290512448573523984E-18 0.169088952095639994E-18 0.173985861321953990E-18 -0.136689941863163006E-18 -0.742890246389383958E-17 0.225469582647254023E-18 0.414781446358590005E-18 0.579688173277162020E-20 0.298155597433514008E-18 0.166018457661843011E-18 -0.339766090967380983E-18 0.405517023497995986E-19 0.152465930505773989E-18 0.265968345437850018E-18 -0.671697127172659976E-18 -0.315784470648243990E-18 -0.822315467062235941E-17 -0.257815653320528003E-18 0.697955148537542998E-18 -0.406575814682063982E-19 -0.247333620598255988E-18 0.582758667710959018E-18 -0.727897763222982996E-17 0.448338738054221020E-20 -0.542101086242751976E-19 0.189735380184963011E-18 -0.758941520739853008E-18 0.460785923306338999E-18 0.111490711682346993E-18 -0.542101086242751986E-17 0.787740640946499026E-18 0.358824332280602981E-18 0.387587718284786979E-17 0.202017357920150992E-18 -0.887267012248879966E-19 -0.146410968421885989E-19 -0.105032085459533001E-18 0.372694496791892029E-19 -0.189735380184963011E-18 -0.355922582691767008E-18 -0.767073037033493925E-17 -0.682708555486965996E-17 0.203287907341032009E-18 0.345589442479755020E-18 0.662040951573960991E-17 -0.326107684692905992E-19 0.393023287525995020E-18 0.105709711817336990E-17 0.792822838630025021E-18 -0.203287907341031991E-19 -0.542101086242751976E-19 0.542101086242752024E-18 0.474338450462407997E-18 -0.137219337455196992E-18 -0.847032947254299978E-19 0.326173859141910017E-18 0.731219906678634021E-19 -0.946472464592363042E-19 0.139237548491848992E-18 0.612680171563180001E-19 -0.188808491239547999E-17 0.145513853081274004E-20 0.351754011649361006E-19 0.103319340142544008E-16 0.706026828653623967E-19 -0.227702440388566992E-18 -0.768408726927613926E-17 -0.144477278643678006E-18 0.338346241908282006E-18 0.257484777844331994E-18 0.254997244763346007E-17 0.210498854330963010E-19 -0.183381484398103994E-18 -0.755890011070466999E-19 0.802413433023315002E-17 0.699252607177727046E-19 0.213594500225259994E-18 -0.235617227606901007E-21 0.130534219874883997E-16 -0.939645122611503009E-19 0.181688567186046989E-18 0.550571415715294980E-19 0.367633474932048013E-17 0.264951905901144992E-17 -0.711507675693612044E-19 -0.796210970419041958E-19 -0.555997720533642962E-19 0.694178241860625994E-19 -0.971685756843591012E-18 0.352246592049581004E-19 -0.177016651086348013E-19 -0.809573907778713970E-17 -0.165171424714588999E-18 0.330342849429177012E-19 -0.540915240116595965E-17 -0.352026892878886990E-17 -0.329326409892472010E-17 0.622238343986890020E-19 -0.660685698858354024E-19 -0.982558218814988032E-19 0.528548559086683026E-17 -0.313402190484091005E-19 -0.866032624180153034E-17 0.952700100629121048E-19 0.000000000000000000E+00 0.121972744404619008E-18 0.146367293285543002E-17 -0.155774652955986003E-18 -0.406575814682063982E-19 0.127393755267047002E-17 0.142301535138722000E-18 0.508219768352579978E-20 -0.738612730005749992E-18 -0.115196480826584991E-18 -0.121972744404619008E-18 0.894678550537355034E-20 -0.120066920273297001E-18 0.246976278573632984E-18 -0.799387343971245945E-19 -0.190053017540184000E-18 0.346884975340074986E-19 -0.175978087053195992E-19 -0.495753329340794005E-18 -0.285070427323536989E-18 -0.441968288153793989E-19 0.812601554256781032E-21 -0.748406217586133030E-17 -0.742510405052100003E-19 0.347283508374263019E-19 0.168771314740419006E-18 -0.142883870289960005E-18 0.396517298433418985E-19 0.398084309385840033E-17 -0.227640104574593005E-19 -0.514307817660970006E-19 0.349851076995627978E-18 0.116316152503736994E-16 -0.578894079889111008E-19 0.347336447933467014E-18 0.295353109518184001E-19 -0.312025761944802984E-18 0.281532575843647979E-18 0.110855436971906990E-18 0.133195930955738993E-18 -0.630616029230826988E-17 -0.385230584411256025E-17 0.125369147999762005E-18 -0.152512769607959997E-18 0.905901737088474059E-17 -0.838562617781756975E-19 -0.264274279543341995E-18 -0.777020380207811956E-19 0.542101086242751976E-19 -0.542101086242751976E-19 -0.542101086242751976E-19 0.406575814682063982E-19 0.840256683676266033E-18 0.551587855251999996E-17 0.158225754547102995E-17 0.362371282747230003E-19 -0.216840434497100983E-18 -0.745388993583784058E-19 0.400138364282931018E-17 0.162247341249214004E-19 0.161979169294623999E-17 -0.595570041038180046E-19 0.894466792300540965E-18 0.284603070277445010E-18 0.502798757490152989E-17 -0.613933450636857020E-19 -0.294972606436410006E-18 0.599699326656045025E-17 0.148230765769502993E-18 0.350671640163280003E-18 0.628498446862691006E-17 0.169406589450860008E-19 0.381164826264434972E-19 -0.220228566286117998E-19 -0.199529198637591003E-18 0.108631975485363994E-18 -0.129066645337873990E-18 0.144456340634422998E-18 0.530040792911729013E-19 0.403618230401881005E-18 0.737014865216481058E-17 0.110059844318994989E-18 -0.454074179816084022E-18 0.352795095513764987E-17 0.327364999223985992E-19 0.128494236353987006E-18 -0.206400753422191982E-17 0.908972231522271051E-19 -0.196988099795828006E-18 -0.510866746312750012E-19 -0.832093403647102925E-17 0.508219768352579978E-20 -0.156595216123638992E-18 0.304349525860310983E-18 -0.762382592088074007E-17 0.127054942088145006E-19 -0.180629776001980005E-18 -0.293741761684931004E-18 0.590206271074140996E-17 -0.421489881126395011E-19 -0.346224717190195023E-18 -0.299214388617581980E-18 -0.100318347108062999E-16 0.568359107607636009E-18 0.107471540347625997E-16 -0.118584612615602002E-19 -0.106064406863998999E-18 -0.429267033245619011E-18 -0.243921335135351988E-17 -0.434104385467828984E-19 0.210838411972415998E-18 -0.518765659418141033E-17 -0.190582413132218010E-18 -0.912678000666508976E-19 0.540618778585057037E-17 0.694567016748526037E-19 0.345589442479755020E-18 -0.406575814682063982E-19 0.202271467804326998E-17 -0.711507675693612044E-19 0.118923425794503999E-17 -0.203287907341031991E-19 0.797629750605692964E-17 0.188464830764082006E-19 -0.451061588025165991E-18 -0.189821406968669007E-18 -0.400002748021503026E-17 0.124839752407727995E-18 0.327464209198704022E-18 0.545653123870147032E-17 -0.251365764192934006E-18 -0.179010689506355000E-18 -0.904977154343205932E-18 -0.183495918530850001E-18 -0.399801975895928995E-18 0.278820123303884006E-23 0.562694574772872009E-18 0.202758511748997993E-19 0.854656243779589042E-18 -0.390636044278167019E-19 -0.107188002479181997E-17 -0.245956339028960991E-20 -0.580863803722753026E-19 -0.791746263062787051E-20 -0.574261868458814053E-19 0.236017789818529992E-18 0.373936922071946989E-18 0.518031784778684025E-18 0.245470727140724987E-17 -0.691522992094331016E-19 -0.285741270800317976E-18 0.316578564036294992E-19 0.110114283143058996E-18 -0.169406589450859996E-18 -0.754330485133237969E-17 0.942324153820408977E-20 0.281214938488428001E-18 -0.111808349037568006E-18 0.279944389067546021E-18 -0.269065309651248993E-19 0.764138861964645979E-17 -0.310440883891150985E-19 0.204959664400601989E-18 0.515554994382785973E-19 0.579622776531413976E-17 -0.229418597622752010E-19 0.920281259736054051E-17 -0.399130950750337972E-23 -0.120874248551148997E-18 -0.174419097167777988E-18 -0.868654828080977068E-18 -0.900415783177556089E-21 0.404465583235387984E-19 -0.681480957419390977E-17 0.149009478768038989E-19 0.338276207821614990E-18 -0.482449426342788030E-17 0.360144848996880021E-20 -0.141509621152472008E-18 -0.132682285823646993E-18 -0.142110385631281993E-16 0.189967376932361008E-22 0.110892271134246991E-18 0.633346199603779993E-19 -0.300611992980550998E-17 -0.269779993700494995E-18 0.391392087358040979E-18 0.589549975636383965E-19 0.134263796311126002E-16 -0.559058288800089044E-19 -0.227134697220322979E-18 0.286999495230071982E-17 -0.530824960132428959E-18 -0.419810704482912991E-18 -0.874646221334790992E-17 -0.519739416435238996E-17 -0.217856874033806009E-17 -0.364476457406177966E-23 0.131634213959238997E-18 -0.338019085513668988E-19 0.242463181151542991E-19 0.205405489709168013E-19 0.396478007354323017E-19 0.290554014399305015E-18 -0.223490056723352997E-17 -0.582914177666118979E-20 -0.155517709978212000E-20 0.518526149271776997E-17 0.184335545146216991E-18 0.465497544075441045E-18 0.198713929425859014E-17 0.592923063078010012E-20 -0.474073752666390980E-19 -0.119365471113852009E-18 -0.367304091612129012E-17 0.149091033606558009E-19 -0.402266203690663009E-19 -0.295418456786575992E-18 -0.928032954377330083E-18 0.123415347392912000E-19 -0.403637669146276001E-18 -0.193017632855574003E-18 0.642437432800943969E-17 -0.115514118181804992E-18 0.261563774112127983E-17 -0.575982404132923990E-19 -0.514996031930614991E-18 -0.508219768352580011E-18 -0.113336184716178005E-16 -0.258810717670098996E-20 -0.318484388167617024E-18 0.813151629364127964E-19 0.115196480826585000E-17 -0.162630325872826002E-18 -0.178829830989063991E-18 -0.260208521396520987E-17 -0.135525271560687994E-19 0.731095312598867980E-19 -0.169050570915216994E-17 0.107679063419703003E-18 -0.465868120989865006E-19 0.981367078732912033E-20 0.423516473627149989E-19 -0.982558218814988032E-19 -0.182959116606928994E-18 -0.101988060805338004E-18 -0.209992702514142018E-17 0.590276085117841030E-19 0.271050543121375988E-19 0.162630325872826002E-18 -0.382181265801140005E-17 0.383811804224604982E-21 0.423516473627149977E-18 -0.149077798716756994E-17 -0.279182059415017007E-17 0.592923063078009997E-19 -0.338813178901719985E-20 0.308319992800564986E-18 0.150771864611265003E-18 -0.740624433255478979E-19 -0.387094056895215014E-18 0.676501392170368033E-19 -0.121549227930992002E-18 0.295402740354937003E-19 -0.291416832279092011E-17 -0.460872725571868987E-19 0.758375729200866953E-19 0.521371940092172967E-20 -0.406504324951388992E-19 -0.651424377925056947E-19 -0.108738516348261003E-17 -0.957544277091384987E-20 -0.357024387267687978E-18 -0.120808074102144995E-18 -0.257471546185705991E-18 0.344363560811950999E-18 0.515776973146925997E-17 0.290371414279082971E-19 0.268688528882230986E-18 0.191512370935880005E-18 0.454950577675083974E-17 0.264846688527228016E-18 -0.236094717615498001E-18 -0.111563503576251997E-18 0.117314063194721006E-18 -0.277085652870562998E-18 -0.285873619698326003E-19 0.390852765598656020E-18 0.429106891079029014E-17 -0.138235776991902008E-17 0.236143521271638010E-19 0.298686647386773004E-19 0.674238226014422974E-18 0.847032947254300038E-20 0.176182853028893989E-18 -0.202096767258955992E-19 -0.745388993583784058E-19 0.325260651745650993E-18 0.119262238973405006E-17 0.307472959853311023E-18 -0.216840434497101002E-17 -0.154498809579183997E-17 -0.527193306371076993E-17 0.337489689921634990E-19 0.389635155736978012E-18 0.267662411332359003E-18 0.125360876193636007E-17 0.162317651601281012E-19 -0.846415539645091067E-17 0.115143541267381001E-20 -0.813151629364127964E-19 -0.243945488809238016E-18 -0.454009659728304982E-17 -0.104290931630686000E-19 0.338813178901719991E-18 0.623416249179164953E-18 0.313063377305188982E-17 0.220228566286117998E-19 0.609863722023096003E-19 0.155854062294790998E-18 -0.355753837846805998E-18 0.374388562686400991E-18 -0.416740210049116008E-18 -0.124831480601602994E-18 0.223034362923898006E-18 -0.571879588294660973E-19 -0.533562697900371999E-17 -0.143723355214125000E-18 0.107285742269608005E-18 -0.225737175575414976E-18 -0.854858075849051981E-19 0.293756650935957019E-19 -0.710708536503824959E-17 -0.797732982746140040E-20 0.214140516977727985E-19 -0.228315083954436005E-18 -0.165965518102638991E-18 -0.213240544471269990E-18 0.618450518525887022E-17 0.116043513773839002E-18 -0.139892785194967992E-18 -0.506313944221258028E-18 0.133937084784586000E-18 0.204346698525099987E-19 0.536807130322413020E-19 0.124523769413732997E-19 0.148952067263648992E-18 -0.301517259442928983E-18 -0.127393755267047002E-17 0.398105485209521015E-18 0.211419423634673018E-17 0.340168431617327016E-17 -0.430292737205185006E-18 -0.193123511973979996E-18 -0.107814853389060005E-16 0.631039545704453993E-19 0.230392961653169981E-18 -0.135525271560687994E-19 0.976205471710581020E-19 -0.194817577868488988E-19 -0.813151629364127964E-19 -0.117102304957906998E-18 -0.428768077900126986E-17 -0.606475590234079043E-18 0.101643953670515996E-19 -0.106005024343306003E-18 0.744917500634628952E-19 0.810409525633515039E-20 0.448588648865876997E-17 0.121760986167806011E-18 0.375955573638820974E-17 0.914795583034644005E-19 -0.891027874206852923E-17 -0.104166389249707997E-18 0.901362169886782956E-19 -0.633570718378866017E-19 0.112392829688519002E-18 0.194071254160812988E-18 0.147344671802759991E-17 0.697367299791314956E-19 0.211231296913984995E-19 0.479296835202547044E-18 -0.566753257617398987E-19 0.323987061890757993E-18 0.259933235688662995E-19 0.986793383551260015E-19 -0.179676863936318002E-18 -0.919808297546715955E-19 -0.606703892083144040E-19 0.386989004957421020E-18 0.314651646799352012E-17 0.101994729949027002E-18 0.684812902965300976E-18 -0.806073444862511971E-18 0.110753693756562992E-18 -0.925118797079306001E-20 0.301637366067872000E-17 0.452103835596982987E-19 -0.753329927464293003E-19 -0.465974000108271974E-18 0.212139401639839982E-17 0.984675801183124024E-19 -0.342677766723567977E-18 -0.147701370177469013E-19 0.162418567636011994E-18 0.148230765769503011E-19 -0.840150804557859028E-19 0.448834817816173048E-18 -0.330239617288731015E-17 -0.107028899458235995E-19 -0.720116040930874954E-19 0.171179072713674006E-18 0.947754072384057926E-17 -0.104498004108727002E-16 -0.159287360851361995E-18 -0.546449624875564009E-18 -0.169016160201734991E-18 -0.141622419855816012E-18 0.105248409733328002E-16 -0.382470559941918998E-19 -0.297665499382925019E-18 0.962848986359687018E-18 -0.178557947061475997E-18 0.440432446400829996E-19 0.263750093250461983E-17 0.518265773643397017E-17 -0.455663906451196969E-17 -0.368948450555535963E-20 -0.373638438713888985E-18 0.213542219055679009E-18 -0.448714132358229986E-17 -0.219963878966174016E-20 0.576077695339490031E-17 0.158077523781333993E-18 -0.930746933967116952E-18 -0.950560029288081993E-19 0.423162853915283991E-19 0.142072256667234998E-18 0.287333593479481977E-19 0.170800388783012010E-19 -0.143679075460453001E-16 -0.306660014178995024E-19 -0.150184396739703993E-18 -0.853679844308174942E-19 -0.189294334590933006E-18 -0.272474283524034976E-18 0.125462270442160009E-18 -0.751674786718394965E-19 0.306550050694755012E-17 -0.309179488361727017E-21 -0.225680824731844995E-17 0.535668041239238007E-21 0.109157572264066003E-20 -0.602222147500702957E-21 -0.399748891870335016E-18 0.651654608810402999E-19 -0.115821846718304011E-18 0.728333770199589983E-19 -0.105682434112270003E-16 0.785310085019301966E-19 -0.113602115086027998E-18 -0.212083351005076997E-19 -0.204710795319530007E-18 -0.245404952064859993E-18 -0.800663773371987988E-17 -0.216121373216464000E-19 -0.990499766618607977E-19 0.114815399364617001E-19 -0.876268280833858963E-19 -0.244044218853604000E-18 -0.920838732195320949E-17 -0.554290201001359983E-20 -0.815213370858219041E-19 -0.323977921931644982E-19 0.244566550410973989E-17 0.922736264539544005E-17 -0.225107239567582988E-18 0.146285264029161001E-20 -0.131179966443482995E-19 0.227861500072650001E-18 0.104859413987007006E-16 -0.325575790958343987E-21 -0.128914520652134002E-20 0.712380445010489047E-17 -0.689612758111431019E-19 -0.518476950651943026E-19 -0.987352703162770031E-18 -0.651557083079744989E-17 0.618057019281626989E-17 -0.256050918324738993E-22 -0.579315073000275023E-20 0.116825754961274992E-20 0.488482179479582027E-17 -0.112839648754171999E-20 -0.171478222420645004E-17 0.121003483029333003E-16 0.281361349456850020E-19 0.196479900706991004E-18 0.446695067007620992E-17 0.296023125814352014E-19 -0.525838715399959994E-19 0.134402804013066001E-16 -0.533630756770208973E-19 -0.135525271560687994E-19 -0.307388256558586017E-17 0.418095462764723033E-17 0.325938278103455002E-17 -0.804756263134598058E-22 0.212102343948397000E-18 0.553747789267498967E-19 0.178660424399612991E-17 -0.110855436971906990E-18 -0.115760817367797994E-16 -0.210618278531899992E-19 -0.934068891307130031E-19 0.132841484063259997E-18 0.281252988796605007E-18 -0.193943299659809005E-18 -0.345377684242941011E-18 -0.582891016608967021E-18 -0.584484497340988962E-17 -0.130601892554772000E-18 -0.345854140275771989E-18 0.152100316675026008E-18 -0.102914503091396998E-18 0.114150924532318007E-21 -0.415920887652381993E-18 -0.147870942203041992E-18 -0.441264460850088028E-18 -0.741908217566160965E-18 0.214934610365779012E-19 0.377141419764976985E-18 0.129262521706926995E-17 -0.169406589450860008E-19 0.576660030490727979E-17 0.296461531539004998E-19 0.508219768352580011E-18 0.572594272343907029E-18 0.357553782859722012E-18 -0.169662498452868990E-21 -0.162630325872826002E-18 -0.745388993583784058E-19 -0.197866896478604983E-17 0.162630325872826002E-18 -0.582335151237331002E-19 0.208708918203459989E-17 -0.508219768352580023E-19 0.100691041604855001E-18 -0.138156367653096994E-17 0.225522522206456984E-18 0.317637355220363012E-18 0.468614360623540998E-19 -0.230392961653169981E-18 -0.332036915323686022E-18 -0.677626357803439970E-20 0.210729224131558998E-18 0.668009886874144024E-17 -0.181651509494605009E-17 -0.511607900141597020E-17 0.677626357803439970E-20 0.169406589450859991E-17 -0.870194004405784926E-21 -0.119153712877039012E-18 -0.291379333855478980E-18 0.481961746987696989E-18 0.290320542671411008E-18 0.758687410855676968E-17 -0.385399991000707015E-19 -0.444692297308507979E-20 0.647980204649540000E-19 0.108234928791338999E-18 0.201157090083095001E-18 -0.197605409177339990E-18 0.198550747370517010E-19 -0.101671722281198999E-18 0.310643151866217981E-18 -0.125183016637982994E-16 -0.145837243138382015E-19 0.171651875903994001E-19 -0.615441504747079020E-17 0.123241691163063998E-18 -0.110059088224217001E-18 -0.109498990014289995E-17 -0.483878427864559008E-19 0.347403720981722015E-18 -0.340587449003903991E-18 0.741306893449987060E-17 -0.363633250169256998E-19 0.364955640550089989E-18 0.328622055261124998E-20 0.494646450528802992E-17 0.588947426258927007E-19 0.299974477127637992E-18 0.552209517671485973E-18 0.121661835129705993E-16 -0.100268920998511004E-18 0.115831755537026005E-18 -0.310225816931887018E-18 0.122421671866663997E-16 0.579370535921941047E-18 -0.727770708280895005E-17 -0.135525271560687994E-19 0.698851812321550961E-19 0.225434841061526989E-18 -0.346550460915418993E-17 0.144465439621161009E-18 -0.453050130217742988E-18 0.304348202371331014E-17 0.166441974135469993E-18 -0.925383494875323000E-19 -0.872359232377204073E-17 -0.423516473627150019E-20 0.172794721239877004E-18 -0.105032085459533001E-18 -0.633919457725117989E-17 0.457397791517322003E-19 0.704731412115577978E-18 -0.674238226014422974E-18 -0.404116110412576017E-17 -0.362753453114926982E-18 -0.475570224499606972E-17 -0.176475368004182995E-18 0.178340140066433007E-19 -0.163225895913863989E-18 0.482835275563946972E-19 0.399079748874743998E-20 0.367605681663466019E-18 0.163914110183507994E-19 -0.579031722743040020E-17 -0.228529489169210000E-17 0.277911509994136018E-17 0.156800809279949005E-22 -0.101942532784422997E-16 -0.200111533788828004E-19 0.681861522539712014E-19 -0.438868945796134002E-19 -0.998186762513412040E-19 -0.723376683507983022E-19 0.104239077746036003E-16 -0.222340978775424991E-19 -0.146386980184122005E-18 0.409358532980754034E-17 0.223196490323958014E-18 0.222899946074357995E-18 0.351381030256605971E-17 -0.227693044133796995E-17 0.248805340344110007E-17 0.166759611490690003E-20 0.826704156520197035E-18 0.189735380184963011E-18 -0.925912890467357058E-19 0.229545928705915007E-18 0.121727104849915995E-16 -0.743059652978834972E-17 -0.681480357713447030E-17 0.102237042169716995E-18 0.239339747158542999E-18 0.388946941467333989E-18 0.338813178901719991E-18 -0.921571846612678961E-18 0.655264687995926973E-17 0.770799982001412947E-19 0.954817889792410057E-18 0.202589700059119992E-19 -0.249620609555842002E-17 -0.114772964352958009E-18 -0.135525271560688006E-18 0.338813178901720015E-19 -0.451045044412914991E-19 0.101643953670516005E-18 0.627651413915437033E-18 0.282061971435682013E-18 0.234119906621088999E-17 0.834327453045485954E-19 0.216840434497100983E-18 0.237169225231203999E-18 -0.646794358523383970E-17 -0.677626357803440031E-19 0.243521972335611010E-18 -0.633315946750199974E-18 0.176182853028893989E-18 0.211758236813574988E-18 -0.549982711273340999E-17 -0.421382929258131981E-19 -0.755877126763073964E-19 -0.927923781985524985E-20 0.544312695782081001E-18 0.417435145161237000E-19 -0.461341442457651995E-17 0.536108318836156013E-19 0.197084421782326988E-18 -0.190691339550880991E-18 -0.193154897758039006E-18 -0.127537878015355010E-18 0.298736053454965990E-18 0.127528573266872008E-18 0.609637342641941969E-17 0.396854887229514001E-20 -0.817053422952606007E-17 -0.752305314909567013E-21 -0.193634152214976995E-19 0.401511567862913983E-22 0.140692652691434998E-18 0.125895048470979998E-19 -0.964089261394241035E-19 0.637270598203130045E-19 0.357904713618655020E-17 -0.192415561086064008E-18 -0.335339833979068015E-18 -0.144439750170145000E-20 0.293628202065289017E-18 -0.755801452661721952E-19 0.339411342868876015E-17 -0.400752487403498007E-20 0.763312882702686022E-19 0.247337834049500987E-18 0.791484376388382995E-19 0.117214281504105993E-18 0.710182978617857932E-17 0.342936365526693036E-20 0.555760486582942945E-19 -0.781423606007173046E-21 0.298447611490521017E-17 -0.609211529548263984E-17 0.371005047806836004E-19 0.187569128895034009E-18 -0.189293009203637990E-18 -0.886637171818198977E-19 0.294046943769591991E-17 0.259566158502791993E-21 0.291867570968205015E-20 0.719829617311043049E-17 0.113028268052886998E-19 0.773855520595252012E-19 -0.378454053841608976E-17 -0.480794137786766970E-17 -0.101370917403036996E-16 0.115757992860620994E-21 -0.592809693679449974E-20 0.167889785464364989E-18 0.611388863593793994E-17 0.327757493780598990E-21 0.452478537105359005E-17 0.863084305453742042E-17 0.186191808557267986E-17 0.626132329032225955E-19 0.646801472276652056E-19 0.138210713419341001E-18 0.227784990428761008E-19 -0.145303063388947988E-19 0.114819286467261003E-18 -0.105892353296588006E-18 -0.137844024253797010E-17 0.171947688292622993E-18 0.230392961653169981E-18 -0.409752188234268030E-19 -0.101232067356301001E-16 0.159844381569747008E-19 -0.165171424714588999E-18 0.324837135272023987E-18 0.247031279622018984E-18 0.105941867811068004E-18 0.165709810725904997E-17 0.250228339151884989E-19 0.121222449583768003E-18 0.220343942471932981E-17 -0.175540960873553008E-18 -0.443011466303800022E-18 -0.485540461189846004E-17 -0.599881968135295963E-17 -0.920317179059558028E-17 -0.319285512590875016E-19 -0.109833041734791000E-19 -0.160489582447537996E-18 0.197370257238828010E-18 0.281536711746710985E-18 0.502518425980557979E-18 0.685177358743191994E-17 0.732594660177182060E-17 -0.814276594997199962E-19 -0.277826806699410993E-18 -0.214934610365779000E-18 0.267620059684996000E-17 -0.444713473132189036E-17 -0.670701863459635981E-17 0.264697796016969006E-19 0.585130359963270982E-17 -0.372694496791892029E-19 0.801293168102568024E-18 0.410641572828885035E-17 0.502798757490152989E-17 0.178988649666674003E-18 0.343835745465758998E-14 -0.367193526019243979E-14 0.124117838402881993E-13 -0.668735222362701016E-14 0.823316024731179930E-17 0.113502414932076005E-18 0.248604170019137005E-18 -0.680961550033253983E-18 -0.504233419544565038E-17 -0.217204263911860001E-19 -0.210064170919066003E-18 -0.277826806699410993E-18 0.184997289636259991E-18 0.124672661923991994E-18 0.142419093985962009E-17 0.318871176989522014E-22 0.362796009892000020E-20 0.475428702862992037E-20 0.114993378999595012E-18 0.296998248186471012E-18 0.137377009476437004E-17 0.107906108735239008E-20 0.344697057730012976E-18 0.233888414833785013E-18 -0.114499755441505997E-18 0.343701109734176990E-18 0.239648173231097987E-18 0.662407103109341977E-18 -0.377078204440514013E-17 -0.211556892294861001E-20 -0.821331087183475025E-17 0.141328819223921000E-20 -0.122986833084708995E-19 -0.578785195122303022E-22 0.117628290555894005E-18 -0.482852030302985980E-19 0.152715824767208010E-18 0.191404425117076001E-18 -0.265067067422130010E-17 -0.664820294833819956E-19 0.731756791723990014E-19 0.711274425411596942E-19 -0.103554248939230999E-18 0.528365257801646029E-18 0.607076619005779013E-17 -0.156159844440068998E-18 0.131687040427342994E-19 -0.227879910615741999E-18 0.316422725621148005E-18 0.120637954277358989E-18 0.119435717232415004E-16 0.515901036912107984E-20 0.767837756490412976E-19 -0.195591700332982007E-18 0.102080243701931009E-17 -0.582123393000517985E-17 -0.113091510730573003E-19 -0.737094946711110960E-24 0.812485213421242952E-19 0.293564361805019011E-18 -0.257901768391306992E-17 0.755454438035129986E-21 -0.102984835529547994E-20 -0.299206409188393008E-17 0.118414324366844011E-18 0.106238035287570000E-18 0.865029155483104026E-17 -0.605451096465977991E-17 -0.908673286122591033E-17 -0.245986784410216017E-21 -0.231756677129184016E-18 -0.175645901796425989E-18 0.582748079799118032E-17 0.756879467374077992E-22 0.697679309046806012E-17 0.378114738427846991E-17 0.185184588830498988E-18 0.240027296595149010E-19 0.612515214688381968E-17 0.116178223514405005E-20 0.107214617423943998E-19 -0.748335452127508986E-17 0.186841398172223001E-18 -0.219407572781849989E-18 -0.827097614197910050E-17 0.295813021938763978E-18 0.167050779066308997E-18 0.000000000000000000E+00 -0.185963535048316991E-18 -0.266565349779309023E-18 0.395342870760552980E-17 0.710830601573883961E-22 0.714369058868556026E-17 0.340525277702111998E-22 -0.382282094159552979E-20 0.149830506514481011E-21 0.883994619572126023E-19 -0.758302221231688993E-19 0.109190432258762008E-18 -0.703507417453546983E-20 -0.569691861010580966E-17 -0.419320786566922027E-19 0.150383337888478994E-18 0.240575431966751987E-21 0.280064166101323972E-19 0.105607927978014003E-18 0.524266083943383965E-17 0.504615464422202999E-23 0.269899549811437999E-18 -0.169841572392483002E-18 0.110246530172922989E-18 0.595056711102719968E-19 0.830106334405030930E-17 0.722278172990987047E-22 -0.260802680060638985E-19 -0.210156414482063007E-19 -0.129291638464487995E-17 0.652706764278169958E-22 0.409368376430043030E-17 0.000000000000000000E+00 0.231210476845850008E-18 0.729891645869777022E-19 0.462908799630395012E-17 -0.770274702091042971E-23 -0.714684049245816005E-21 0.462959092211639007E-17 -0.138590428094594993E-19 -0.208516916658589005E-19 -0.129291638464487995E-17 -0.149795744801019001E-21 -0.553218393675464984E-20 -0.532042569994107024E-20 0.409368376430043030E-17 0.000000000000000000E+00 -0.494984878551731986E-20 -0.489690922631392007E-20 -0.739443137510889016E-17 0.111000775673736002E-16 0.222679088850805999E-18 0.106845003637134005E-18 -0.490050851211016962E-17 0.293770518732017028E-19 -0.802142049281503955E-19 -0.297911234386516011E-17 0.270478431405522001E-18 0.169875903679790995E-18 -0.867533249122434064E-17 -0.585861028395123963E-17 0.499436149037106027E-17 -0.308114995328776013E-22 -0.130599392574966007E-14 0.130515766480161008E-14 0.769254928189150031E-19 -0.103944874968919999E-18 -0.878111951636581074E-17 -0.350432956441832991E-22 0.633450637144740049E-19 -0.820001218346395020E-19 -0.398999557912142013E-21 0.148399850494123999E-19 0.909624493601297957E-17 0.302645293357789021E-21 -0.219210703865852003E-18 0.308119015134121004E-18 -0.275701318827977019E-18 -0.156380108301380001E-18 -0.956776784104850991E-17 -0.220370776999360005E-18 0.316081995140841987E-18 0.198833102388086990E-19 0.717056862337457936E-17 0.855578145811417952E-19 -0.993211691437939944E-19 0.833220302850867983E-21 -0.217506104734504998E-19 -0.778452855386439975E-19 0.409724525050444025E-17 -0.504842952642287961E-21 -0.346727880643630006E-18 -0.411653532459296998E-18 0.396489180937604980E-17 -0.136437880709918003E-23 -0.124957018075439001E-16 0.302207630763774979E-21 0.274303192419421975E-19 0.735547307649483054E-19 0.845852113709716048E-22 -0.134921032453032995E-18 -0.229587702135863011E-18 -0.161215647846060990E-19 -0.410673872542583016E-17 0.980567686783852952E-19 -0.139877417969035991E-18 -0.665224868865051959E-21 0.287046018190561014E-19 -0.154866630141438997E-19 0.223870818551690020E-18 0.124778581831986997E-18 0.153250017406549997E-17 -0.448632604218867018E-19 0.711507675693612044E-19 -0.677626357803439970E-20 0.107945878798088004E-16 -0.300431998479260013E-19 0.433680868994201966E-18 0.118313562072480995E-16 -0.637646402693036987E-17 0.271050543121375988E-19 -0.101643953670516005E-18 0.677626357803439970E-20 0.813151629364127964E-19 0.279520872593919016E-18 -0.745388993583784058E-19 -0.100849860282464999E-19 -0.592051421507532003E-19 0.780283607724334012E-19 -0.664858447910062962E-19 0.437413849068639009E-19 -0.516544150489818993E-17 0.503227866641761002E-20 -0.301136200599181013E-19 0.266177796878770016E-17 0.102904964664959001E-18 0.312088382102112999E-18 -0.297903979430933015E-17 0.202876384986286988E-20 0.543951385875457012E-19 -0.239374042842221994E-18 0.418443806615245012E-17 -0.562255471114576966E-19 -0.295243281097027992E-18 -0.799691946769469941E-19 -0.852375263513670959E-17 0.140981303182299992E-19 -0.492761908203678022E-18 0.254715677652711988E-18 -0.117430723578436001E-16 -0.309172195626648020E-19 0.796210970419041958E-19 0.158818677610181001E-18 0.208899500616592014E-17 -0.443506451182351966E-17 -0.105032085459533001E-18 0.115196480826584991E-18 0.152282296409787000E-18 -0.133506123685445993E-18 0.772670733674762928E-17 0.347283508374263019E-19 -0.532836663382158006E-19 0.582929066917145019E-17 0.359989002583077981E-18 -0.155854062294790998E-18 -0.347495266611077003E-18 0.220906192643921991E-17 0.498055372985528985E-17 -0.167957369017666995E-18 0.338813178901720015E-19 -0.116890546721092989E-18 -0.293751026107791011E-17 0.733609941661028944E-19 0.100944390482868002E-16 -0.267894332096603011E-19 0.216840434497100983E-18 0.135525271560687994E-19 0.151788304147971006E-17 -0.247227741479849002E-19 -0.138913403349704991E-18 0.379470760369926985E-18 0.643745039913267969E-18 0.203287907341031991E-19 0.304931861011548002E-19 -0.182959116606928994E-18 0.347283508374262995E-18 0.196776341559015009E-18 -0.427751638363421972E-19 -0.163649412387490995E-19 0.690331852012255017E-19 -0.531407295283666964E-18 -0.422299341979263986E-18 -0.157579974642883004E-19 0.192321973960417003E-18 0.223317258693390997E-18 0.423311746425542994E-19 0.683894318895060959E-19 0.540270370111050010E-17 0.298115892764110993E-19 -0.196617522881404009E-18 -0.811034046995993056E-19 0.129860738725924993E-18 0.422987078035115991E-19 -0.456513700878625969E-17 -0.224993126614424009E-19 -0.459581548334461984E-18 -0.133004025053625993E-18 0.735806933367970026E-18 0.405093507024368993E-18 0.194632289411277007E-18 -0.352809074866117003E-19 -0.232722302258119018E-18 -0.601393392550553000E-19 -0.627863172152250006E-19 -0.931736241979730072E-20 -0.765971894202063951E-17 -0.399121924746226012E-17 0.190077832958559989E-18 0.179066508041831014E-19 -0.574288338238416015E-17 0.880914265144471992E-19 0.182959116606928994E-18 0.153140909885616995E-18 0.203287907341031991E-19 0.325260651745650993E-18 0.379470760369926985E-18 -0.207523072077303992E-18 -0.361852475067037028E-17 0.301543729222530998E-17 0.394039727062700980E-17 0.350989277518501004E-19 0.393023287525995020E-18 0.272744609015884998E-18 -0.357447903741314974E-17 0.155423101195651015E-19 -0.123801276779504007E-16 0.770270586409379010E-19 -0.443845264361252992E-18 -0.250721752387272997E-18 -0.393700913883799029E-17 -0.601128694754536031E-19 0.103676832743925996E-17 -0.806375365786094077E-17 0.270372916763573015E-17 0.271050543121375988E-19 -0.168051336735252995E-17 -0.514996031930614991E-18 -0.155854062294790998E-18 0.172794721239877004E-18 -0.387517573368842020E-18 -0.207523072077304004E-19 0.390799826039453012E-18 -0.925342135844694988E-19 0.106281541774795000E-18 0.967048582329619004E-19 -0.834571884916494946E-17 -0.954318272702428028E-19 0.207248448113936002E-18 0.517443990235404036E-17 -0.244696568805436984E-18 -0.116783013241462011E-18 -0.578651881405754976E-17 -0.731624708190902038E-19 -0.335583865790313019E-18 -0.197623374506268996E-18 -0.163191485200382005E-17 0.101432195433701996E-18 0.148336644887909010E-18 -0.173324116831910990E-18 0.539199998398406007E-17 -0.169406589450860008E-19 -0.844915364886164949E-19 -0.416303458685688009E-19 0.233338717250593997E-17 0.273292202581394994E-19 -0.728448334638698051E-19 -0.525160427297666030E-19 -0.110323923797504006E-16 -0.193123511973980015E-17 -0.372694496791892029E-19 0.115196480826584991E-18 0.242873462735370015E-18 0.910537153843644961E-20 -0.105840274005222006E-16 -0.469176843440076996E-20 0.193136746863780989E-18 -0.343708102894564009E-17 -0.209217137971811991E-18 0.414199111207353010E-18 0.394293836946877000E-18 0.304931861011548002E-19 -0.304931861011547978E-18 0.611915129942227994E-19 0.237169225231204005E-19 -0.299849663328021983E-18 0.207692478666753981E-17 -0.319490239792480988E-19 -0.133513568310958995E-17 -0.374812079160028020E-19 -0.406575814682063982E-19 0.406575814682063982E-19 -0.420128341838133016E-18 0.463088794131687014E-19 -0.138913403349704991E-18 0.127393755267047002E-17 -0.117906986257798992E-17 0.169406589450859993E-20 -0.332036915323686022E-18 -0.227004829864152009E-18 -0.559041745187837983E-19 -0.709390093325477015E-19 -0.947618109740748986E-19 0.242489650931144976E-18 0.359989002583078005E-19 -0.180312138646758992E-18 -0.229976452627177011E-17 -0.157796657189281015E-19 0.349872583691553995E-19 0.723435620126626944E-19 -0.457635088955547972E-19 -0.933145550948367955E-19 -0.123819011531837992E-17 -0.782909906169189995E-19 -0.184229666027809993E-19 -0.157336369952485999E-18 0.346171777630992014E-18 0.105217373916745000E-19 -0.111834818817168995E-17 -0.155642304057977988E-19 -0.955956090315283023E-19 -0.117479499317231010E-18 -0.362238933849222033E-17 0.115699406639016991E-18 -0.613040095575299991E-19 0.294757539477146002E-19 0.720825038113409974E-18 0.197676314065472005E-18 0.436963121664811995E-18 0.347865843525499989E-18 -0.652215369385810972E-18 0.381842452622238978E-17 0.131349663828520009E-18 -0.153847115333583998E-18 0.736833960816516019E-17 -0.262580213648833015E-19 -0.284603070277445010E-18 -0.197663079175671012E-19 -0.101643953670516005E-18 -0.298155597433514008E-18 0.840256683676266033E-18 -0.957147230397359022E-19 -0.220906192643921991E-17 0.447233396150270964E-17 0.315435069557500984E-17 0.371635705607823973E-19 0.216840434497100983E-18 0.643745039913267969E-18 0.358125530099118029E-17 0.155665051524822986E-19 -0.103592129449201009E-17 0.619922238271740940E-19 -0.238439774652085979E-18 -0.407277263841508986E-18 -0.147777239183251985E-16 -0.557205404227970984E-19 0.336503690631471986E-18 -0.140650350287169006E-16 0.180418017765165996E-18 0.145689666927739995E-18 0.814210420548196008E-18 -0.687790753170491972E-18 0.718283939271646976E-18 -0.423516473627150019E-20 -0.565295230618739017E-18 -0.740359735459462010E-19 -0.399799551104030001E-18 0.152465930505773989E-18 -0.679733946795801001E-17 -0.208960686132527017E-20 -0.487083649340625048E-18 -0.251436436436519005E-18 -0.289066536862780991E-19 0.571135125743363007E-19 -0.514784273693801031E-18 0.586040920381569047E-19 -0.225310763969643986E-18 -0.897854924089557998E-19 -0.374812079160027996E-18 -0.124990299279213007E-18 -0.103052145945326007E-17 0.125149117956822995E-18 -0.194764638309285997E-18 0.287673564711241993E-18 -0.105034203041900993E-16 -0.128113733272213002E-19 0.546336250979023959E-19 -0.391223342513080029E-19 -0.282909004382935976E-18 -0.487890977618476995E-18 0.135525271560688006E-18 0.948676900924816019E-19 -0.650521303491303046E-18 -0.664073830647370984E-18 -0.546505657568474964E-17 -0.508219768352579978E-20 -0.460785923306338999E-17 -0.338813178901720015E-19 0.482469966756049011E-17 0.372694496791892029E-19 0.184653182501436992E-18 0.528548559086683026E-18 0.178529134292789006E-16 -0.885043550762337067E-17 0.203626720519933994E-17 0.109267250195805009E-18 -0.243521972335611010E-18 -0.253712837482265010E-19 -0.291379333855478980E-18 -0.145689666927739995E-18 -0.109097843606353999E-17 0.118584612615601999E-18 0.419895407777638036E-17 -0.436221967835964976E-19 0.166770584104119993E-18 0.301564784753128000E-18 0.105479300460810000E-16 0.153246012714406002E-21 -0.721442795365178040E-20 0.785191282370076000E-17 0.611660841639171959E-19 -0.269408879070079001E-19 0.980216235171613067E-20 -0.642700149301778963E-17 -0.852508928293581066E-17 0.697544054984669996E-24 0.250693589472354980E-18 0.114838828609231990E-18 0.137684826882562005E-19 -0.881975394890917019E-19 0.988901378360807009E-17 -0.584661507095537965E-20 -0.378455001863782989E-18 0.568235859210977989E-20 -0.215342517844311983E-20 -0.145015040856607996E-18 -0.469036545974539019E-17 -0.514100323766396972E-20 -0.305076832491833999E-18 -0.196493615426171004E-18 0.205358492279442992E-18 0.117772935177185998E-18 0.205010377842294006E-17 0.202435071204792989E-19 -0.619429451884161010E-19 0.515229817082411962E-18 0.798642516943490968E-17 -0.375603283878283021E-18 -0.257011107083719992E-18 -0.135643988942254000E-20 -0.402490229080583990E-19 -0.873766277585149009E-19 0.487430530707361009E-17 0.786805309075646029E-21 0.927211481210010019E-19 -0.946732143119615016E-19 -0.966803394206032053E-17 0.210728270163397999E-24 -0.182379220144080012E-17 0.301144665518455994E-21 0.598805976239300004E-20 0.413513361367576011E-19 0.379595377543990018E-21 -0.148122463346125989E-18 -0.690407715361336988E-19 -0.320930310026529998E-19 -0.913936660975153941E-17 0.247663640323067001E-18 0.222959731044695013E-18 -0.869297579538561051E-22 0.634741170484694975E-20 -0.391093459440610985E-21 -0.235888633395294979E-18 -0.133873622544535990E-18 -0.597176764414821015E-17 -0.972390876617005020E-20 SuperLU_DIST_5.3.0/EXAMPLE/pddrive.c0000644013363400111340000001526213233431301015536 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Driver program for PDGSSVX example * *
 * -- Distributed SuperLU routine (version 4.1) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * November 1, 2007
 * April 5, 2015
 * 
*/ #include #include #include "superlu_ddefs.h" /*! \brief * *
 * Purpose
 * =======
 *
 * The driver program PDDRIVE.
 *
 * This example illustrates how to use PDGSSVX with the full
 * (default) options to solve a linear system.
 * 
 * Five basic steps are required:
 *   1. Initialize the MPI environment and the SuperLU process grid
 *   2. Set up the input matrix and the right-hand side
 *   3. Set the options argument
 *   4. Call pdgssvx
 *   5. Release the process grid and terminate the MPI environment
 *
 * With MPICH,  program may be run by typing:
 *    mpiexec -n  pddrive -r  -c  big.rua
 * 
*/ int main(int argc, char *argv[]) { superlu_dist_options_t options; SuperLUStat_t stat; SuperMatrix A; ScalePermstruct_t ScalePermstruct; LUstruct_t LUstruct; SOLVEstruct_t SOLVEstruct; gridinfo_t grid; double *berr; double *b, *xtrue; int m, n; int nprow, npcol; int iam, info, ldb, ldx, nrhs; char **cpp, c; FILE *fp, *fopen(); int cpp_defs(); nprow = 1; /* Default process rows. */ npcol = 1; /* Default process columns. */ nrhs = 1; /* Number of right-hand side. */ /* ------------------------------------------------------------ INITIALIZE MPI ENVIRONMENT. ------------------------------------------------------------*/ MPI_Init( &argc, &argv ); /* Parse command line argv[]. */ for (cpp = argv+1; *cpp; ++cpp) { if ( **cpp == '-' ) { c = *(*cpp+1); ++cpp; switch (c) { case 'h': printf("Options:\n"); printf("\t-r : process rows (default %4d)\n", nprow); printf("\t-c : process columns (default %4d)\n", npcol); exit(0); break; case 'r': nprow = atoi(*cpp); break; case 'c': npcol = atoi(*cpp); break; } } else { /* Last arg is considered a filename */ if ( !(fp = fopen(*cpp, "r")) ) { ABORT("File does not exist"); } break; } } /* ------------------------------------------------------------ INITIALIZE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ superlu_gridinit(MPI_COMM_WORLD, nprow, npcol, &grid); /* Bail out if I do not belong in the grid. */ iam = grid.iam; if ( iam >= nprow * npcol ) goto out; if ( !iam ) { int v_major, v_minor, v_bugfix; superlu_dist_GetVersionNumber(&v_major, &v_minor, &v_bugfix); printf("Library version:\t%d.%d.%d\n", v_major, v_minor, v_bugfix); printf("Input matrix file:\t%s\n", *cpp); printf("Process grid:\t\t%d X %d\n", (int)grid.nprow, (int)grid.npcol); fflush(stdout); } #if ( VAMPIR>=1 ) VT_traceoff(); #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter main()"); #endif /* ------------------------------------------------------------ GET THE MATRIX FROM FILE AND SETUP THE RIGHT HAND SIDE. ------------------------------------------------------------*/ dcreate_matrix(&A, nrhs, &b, &ldb, &xtrue, &ldx, fp, &grid); if ( !(berr = doubleMalloc_dist(nrhs)) ) ABORT("Malloc fails for berr[]."); /* ------------------------------------------------------------ NOW WE SOLVE THE LINEAR SYSTEM. ------------------------------------------------------------*/ /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ParSymbFact = NO; options.ColPerm = METIS_AT_PLUS_A; options.RowPerm = LargeDiag; options.ReplaceTinyPivot = NO; options.IterRefine = DOUBLE; options.Trans = NOTRANS; options.SolveInitialized = NO; options.RefineInitialized = NO; options.PrintStat = YES; */ set_default_options_dist(&options); #if 0 options.ColPerm = PARMETIS; options.ParSymbFact = YES; options.RowPerm = NOROWPERM; options.IterRefine = NOREFINE; options.Equil = NO; #endif if (!iam) { print_sp_ienv_dist(&options); print_options_dist(&options); fflush(stdout); } m = A.nrow; n = A.ncol; /* Initialize ScalePermstruct and LUstruct. */ ScalePermstructInit(m, n, &ScalePermstruct); LUstructInit(n, &LUstruct); /* Initialize the statistics variables. */ PStatInit(&stat); /* Call the linear equation solver. */ pdgssvx(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid, &LUstruct, &SOLVEstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ pdinf_norm_error(iam, ((NRformat_loc *)A.Store)->m_loc, nrhs, b, ldb, xtrue, ldx, &grid); PStatPrint(&options, &stat, &grid); /* Print the statistics. */ /* ------------------------------------------------------------ DEALLOCATE STORAGE. ------------------------------------------------------------*/ PStatFree(&stat); Destroy_CompRowLoc_Matrix_dist(&A); ScalePermstructFree(&ScalePermstruct); Destroy_LU(n, &grid, &LUstruct); LUstructFree(&LUstruct); if ( options.SolveInitialized ) { dSolveFinalize(&options, &SOLVEstruct); } SUPERLU_FREE(b); SUPERLU_FREE(xtrue); SUPERLU_FREE(berr); /* ------------------------------------------------------------ RELEASE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ out: superlu_gridexit(&grid); /* ------------------------------------------------------------ TERMINATES THE MPI EXECUTION ENVIRONMENT. ------------------------------------------------------------*/ MPI_Finalize(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit main()"); #endif } int cpp_defs() { printf(".. CPP definitions:\n"); #if ( PRNTlevel>=1 ) printf("\tPRNTlevel = %d\n", PRNTlevel); #endif #if ( DEBUGlevel>=1 ) printf("\tDEBUGlevel = %d\n", DEBUGlevel); #endif #if ( PROFlevel>=1 ) printf("\tPROFlevel = %d\n", PROFlevel); #endif #if ( StaticPivot>=1 ) printf("\tStaticPivot = %d\n", StaticPivot); #endif printf("....\n"); return 0; } SuperLU_DIST_5.3.0/EXAMPLE/pddrive1_ABglobal.c0000644013363400111340000002151713233431301017342 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Driver program for pdgssvx_ABglobal example * *
 * -- Distributed SuperLU routine (version 4.1) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 * April 5, 2015
 * 
*/ #include #include "superlu_ddefs.h" /*! \brief * *
 * Purpose
 * =======
 *
 * The driver program pddrive1_ABglobal.
 *
 * This example illustrates how to use pdgssvx_ABglobal to
 * solve systems with the same A but different right-hand side.
 * In this case, we factorize A only once in the first call to
 * pdgssvx_ABglobal, and reuse the following data structures
 * in the subsequent call to pdgssvx_ABglobal:
 *        ScalePermstruct  : DiagScale, R, C, perm_r, perm_c
 *        LUstruct         : Glu_persist, Llu
 * 
 * On an IBM SP, the program may be run by typing:
 *    poe pddrive1_ABglobal -r  -c   -procs 

*

*/ int main(int argc, char *argv[]) { superlu_dist_options_t options; SuperLUStat_t stat; SuperMatrix A; ScalePermstruct_t ScalePermstruct; LUstruct_t LUstruct; gridinfo_t grid; double *berr; double *a, *b, *b1, *xtrue; int_t *asub, *xa; int_t i, j, m, n, nnz; int_t nprow, npcol; int iam, info, ldb, ldx, nrhs; char trans[1]; char **cpp, c; FILE *fp, *fopen(); extern int cpp_defs(); /* prototypes */ extern void LUstructInit(const int_t, LUstruct_t *); extern void LUstructFree(LUstruct_t *); extern void Destroy_LU(int_t, gridinfo_t *, LUstruct_t *); nprow = 1; /* Default process rows. */ npcol = 1; /* Default process columns. */ nrhs = 1; /* Number of right-hand side. */ /* ------------------------------------------------------------ INITIALIZE MPI ENVIRONMENT. ------------------------------------------------------------*/ MPI_Init( &argc, &argv ); /* Parse command line argv[]. */ for (cpp = argv+1; *cpp; ++cpp) { if ( **cpp == '-' ) { c = *(*cpp+1); ++cpp; switch (c) { case 'h': printf("Options:\n"); printf("\t-r : process rows (default " IFMT ")\n", nprow); printf("\t-c : process columns (default " IFMT ")\n", npcol); exit(0); break; case 'r': nprow = atoi(*cpp); break; case 'c': npcol = atoi(*cpp); break; } } else { /* Last arg is considered a filename */ if ( !(fp = fopen(*cpp, "r")) ) { ABORT("File does not exist"); } break; } } /* ------------------------------------------------------------ INITIALIZE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ superlu_gridinit(MPI_COMM_WORLD, nprow, npcol, &grid); /* Bail out if I do not belong in the grid. */ iam = grid.iam; if ( iam >= nprow * npcol ) goto out; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter main()"); #endif /* ------------------------------------------------------------ PROCESS 0 READS THE MATRIX A, AND THEN BROADCASTS IT TO ALL THE OTHER PROCESSES. ------------------------------------------------------------*/ if ( !iam ) { /* Print the CPP definitions. */ cpp_defs(); /* Read the matrix stored on disk in Harwell-Boeing format. */ dreadhb_dist(iam, fp, &m, &n, &nnz, &a, &asub, &xa); printf("Input matrix file: %s\n", *cpp); printf("\tDimension\t" IFMT "x" IFMT "\t # nonzeros " IFMT "\n", m, n, nnz); printf("\tProcess grid\t%d X %d\n", (int) grid.nprow, (int) grid.npcol); /* Broadcast matrix A to the other PEs. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( a, nnz, MPI_DOUBLE, 0, grid.comm ); MPI_Bcast( asub, nnz, mpi_int_t, 0, grid.comm ); MPI_Bcast( xa, n+1, mpi_int_t, 0, grid.comm ); } else { /* Receive matrix A from PE 0. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid.comm ); /* Allocate storage for compressed column representation. */ dallocateA_dist(n, nnz, &a, &asub, &xa); MPI_Bcast( a, nnz, MPI_DOUBLE, 0, grid.comm ); MPI_Bcast( asub, nnz, mpi_int_t, 0, grid.comm ); MPI_Bcast( xa, n+1, mpi_int_t, 0, grid.comm ); } /* Create compressed column matrix for A. */ dCreate_CompCol_Matrix_dist(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE); /* Generate the exact solution and compute the right-hand side. */ if ( !(b = doubleMalloc_dist(m * nrhs)) ) ABORT("Malloc fails for b[]"); if ( !(b1 = doubleMalloc_dist(m * nrhs)) ) ABORT("Malloc fails for b1[]"); if ( !(xtrue = doubleMalloc_dist(n*nrhs)) ) ABORT("Malloc fails for xtrue[]"); *trans = 'N'; ldx = n; ldb = m; dGenXtrue_dist(n, nrhs, xtrue, ldx); dFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb); for (j = 0; j < nrhs; ++j) for (i = 0; i < m; ++i) b1[i+j*ldb] = b[i+j*ldb]; if ( !(berr = doubleMalloc_dist(nrhs)) ) ABORT("Malloc fails for berr[]."); /* ------------------------------------------------------------ WE SOLVE THE LINEAR SYSTEM FOR THE FIRST TIME. ------------------------------------------------------------*/ /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = METIS_AT_PLUS_A; options.RowPerm = LargeDiag; options.ReplaceTinyPivot = YES; options.Trans = NOTRANS; options.IterRefine = DOUBLE; options.SolveInitialized = NO; options.RefineInitialized = NO; options.PrintStat = YES; */ set_default_options_dist(&options); if (!iam) { print_sp_ienv_dist(&options); print_options_dist(&options); } /* Initialize ScalePermstruct and LUstruct. */ ScalePermstructInit(m, n, &ScalePermstruct); LUstructInit(n, &LUstruct); /* Initialize the statistics variables. */ PStatInit(&stat); /* Call the linear equation solver: factorize and solve. */ pdgssvx_ABglobal(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid, &LUstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ if ( !iam ) { dinf_norm_error_dist(n, nrhs, b, ldb, xtrue, ldx, &grid); } PStatPrint(&options, &stat, &grid); /* Print the statistics. */ PStatFree(&stat); /* ------------------------------------------------------------ NOW WE SOLVE ANOTHER SYSTEM WITH THE SAME A BUT DIFFERENT RIGHT-HAND SIDE, WE WILL USE THE EXISTING L AND U FACTORS IN LUSTRUCT OBTAINED FROM A PREVIOUS FATORIZATION. ------------------------------------------------------------*/ options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */ PStatInit(&stat); /* Initialize the statistics variables. */ pdgssvx_ABglobal(&options, &A, &ScalePermstruct, b1, ldb, nrhs, &grid, &LUstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ if ( !iam ) { printf("Solve the system with a different B.\n"); dinf_norm_error_dist(n, nrhs, b1, ldb, xtrue, ldx, &grid); } /* Print the statistics. */ PStatPrint(&options, &stat, &grid); /* ------------------------------------------------------------ DEALLOCATE STORAGE. ------------------------------------------------------------*/ PStatFree(&stat); Destroy_CompCol_Matrix_dist(&A); Destroy_LU(n, &grid, &LUstruct); ScalePermstructFree(&ScalePermstruct); LUstructFree(&LUstruct); SUPERLU_FREE(b); SUPERLU_FREE(b1); SUPERLU_FREE(xtrue); SUPERLU_FREE(berr); /* ------------------------------------------------------------ RELEASE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ out: superlu_gridexit(&grid); /* ------------------------------------------------------------ TERMINATES THE MPI EXECUTION ENVIRONMENT. ------------------------------------------------------------*/ MPI_Finalize(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit main()"); #endif } int cpp_defs() { printf(".. CPP definitions:\n"); #if ( PRNTlevel>=1 ) printf("\tPRNTlevel = %d\n", PRNTlevel); #endif #if ( DEBUGlevel>=1 ) printf("\tDEBUGlevel = %d\n", DEBUGlevel); #endif #if ( PROFlevel>=1 ) printf("\tPROFlevel = %d\n", PROFlevel); #endif #if ( StaticPivot>=1 ) printf("\tStaticPivot = %d\n", StaticPivot); #endif printf("....\n"); return 0; } SuperLU_DIST_5.3.0/EXAMPLE/pzgsmv.c0000644013363400111340000003145413233431301015430 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief * *
 * -- Distributed SuperLU routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * March 15, 2003
 * 
*/ #include #include "superlu_zdefs.h" void pzgsmv_init ( SuperMatrix *A, /* Matrix A permuted by columns (input/output). The type of A can be: Stype = NR_loc; Dtype = D; Mtype = GE. */ int_t *row_to_proc, /* Input. Mapping between rows and processes. */ gridinfo_t *grid, /* Input */ pzgsmv_comm_t *gsmv_comm /* Output. The data structure for communication. */ ) { NRformat_loc *Astore; int iam, p, procs; int *SendCounts, *RecvCounts; int_t i, j, k, l, m, m_loc, n, fst_row, jcol; int_t TotalIndSend, TotalValSend; int_t *colind, *rowptr; int_t *ind_tosend = NULL, *ind_torecv = NULL; int_t *ptr_ind_tosend, *ptr_ind_torecv; int_t *extern_start, *spa, *itemp; doublecomplex *nzval, *val_tosend = NULL, *val_torecv = NULL, t; MPI_Request *send_req, *recv_req; MPI_Status status; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Enter pzgsmv_init()"); #endif /* ------------------------------------------------------------ INITIALIZATION. ------------------------------------------------------------*/ iam = grid->iam; procs = grid->nprow * grid->npcol; Astore = (NRformat_loc *) A->Store; m = A->nrow; n = A->ncol; m_loc = Astore->m_loc; fst_row = Astore->fst_row; colind = Astore->colind; rowptr = Astore->rowptr; nzval = Astore->nzval; if ( !(SendCounts = SUPERLU_MALLOC(2*procs * sizeof(int))) ) ABORT("Malloc fails for SendCounts[]"); /*for (i = 0; i < 2*procs; ++i) SendCounts[i] = 0;*/ RecvCounts = SendCounts + procs; if ( !(ptr_ind_tosend = intMalloc_dist(2*(procs+1))) ) ABORT("Malloc fails for ptr_ind_tosend[]"); ptr_ind_torecv = ptr_ind_tosend + procs + 1; if ( !(extern_start = intMalloc_dist(m_loc)) ) ABORT("Malloc fails for extern_start[]"); for (i = 0; i < m_loc; ++i) extern_start[i] = rowptr[i]; /* ------------------------------------------------------------ COUNT THE NUMBER OF X ENTRIES TO BE SENT TO EACH PROCESS. THIS IS THE UNION OF THE COLUMN INDICES OF MY ROWS. SWAP TO THE BEGINNING THE PART OF A CORRESPONDING TO THE LOCAL PART OF X. THIS ACCOUNTS FOR THE FIRST PASS OF ACCESSING MATRIX A. ------------------------------------------------------------*/ if ( !(spa = intCalloc_dist(n)) ) /* Aid in global to local translation */ ABORT("Malloc fails for spa[]"); for (p = 0; p < procs; ++p) SendCounts[p] = 0; for (i = 0; i < m_loc; ++i) { /* Loop through each row */ k = extern_start[i]; for (j = rowptr[i]; j < rowptr[i+1]; ++j) {/* Each nonzero in row i */ jcol = colind[j]; p = row_to_proc[jcol]; if ( p != iam ) { /* External */ if ( spa[jcol] == 0 ) { /* First time see this index */ ++SendCounts[p]; spa[jcol] = 1; } } else { /* Swap to beginning the part of A corresponding to the local part of X */ l = colind[k]; t = nzval[k]; colind[k] = jcol; nzval[k] = nzval[j]; colind[j] = l; nzval[j] = t; ++k; } } extern_start[i] = k; } /* ------------------------------------------------------------ LOAD THE X-INDICES TO BE SENT TO THE OTHER PROCESSES. THIS ACCOUNTS FOR THE SECOND PASS OF ACCESSING MATRIX A. ------------------------------------------------------------*/ /* Build pointers to ind_tosend[]. */ ptr_ind_tosend[0] = 0; for (p = 0, TotalIndSend = 0; p < procs; ++p) { TotalIndSend += SendCounts[p]; /* Total to send. */ ptr_ind_tosend[p+1] = ptr_ind_tosend[p] + SendCounts[p]; } #if 0 ptr_ind_tosend[iam] = 0; /* Local part of X */ #endif if ( TotalIndSend ) { if ( !(ind_tosend = intMalloc_dist(TotalIndSend)) ) ABORT("Malloc fails for ind_tosend[]"); /* Exclude local part of X */ } /* Build SPA to aid global to local translation. */ for (i = 0; i < n; ++i) spa[i] = EMPTY; for (i = 0; i < m_loc; ++i) { /* Loop through each row of A */ for (j = rowptr[i]; j < rowptr[i+1]; ++j) { jcol = colind[j]; if ( spa[jcol] == EMPTY ) { /* First time see this index */ p = row_to_proc[jcol]; if ( p == iam ) { /* Local */ /*assert(jcol>=fst_row);*/ spa[jcol] = jcol - fst_row; /* Relative position in local X */ } else { /* External */ ind_tosend[ptr_ind_tosend[p]] = jcol; /* Still global */ spa[jcol] = ptr_ind_tosend[p]; /* Position in ind_tosend[] */ ++ptr_ind_tosend[p]; } } } } /* ------------------------------------------------------------ TRANSFORM THE COLUMN INDICES OF MATRIX A INTO LOCAL INDICES. THIS ACCOUNTS FOR THE THIRD PASS OF ACCESSING MATRIX A. ------------------------------------------------------------*/ for (i = 0; i < m_loc; ++i) { for (j = rowptr[i]; j < rowptr[i+1]; ++j) { jcol = colind[j]; colind[j] = spa[jcol]; } } /* ------------------------------------------------------------ COMMUNICATE THE EXTERNAL INDICES OF X. ------------------------------------------------------------*/ MPI_Alltoall(SendCounts, 1, MPI_INT, RecvCounts, 1, MPI_INT, grid->comm); /* Build pointers to ind_torecv[]. */ ptr_ind_torecv[0] = 0; for (p = 0, TotalValSend = 0; p < procs; ++p) { TotalValSend += RecvCounts[p]; /* Total to receive. */ ptr_ind_torecv[p+1] = ptr_ind_torecv[p] + RecvCounts[p]; } if ( TotalValSend ) { if ( !(ind_torecv = intMalloc_dist(TotalValSend)) ) ABORT("Malloc fails for ind_torecv[]"); } if ( !(send_req = (MPI_Request *) SUPERLU_MALLOC(2*procs *sizeof(MPI_Request)))) ABORT("Malloc fails for recv_req[]."); recv_req = send_req + procs; for (p = 0; p < procs; ++p) { ptr_ind_tosend[p] -= SendCounts[p]; /* Reset pointer to beginning */ if ( SendCounts[p] ) { MPI_Isend(&ind_tosend[ptr_ind_tosend[p]], SendCounts[p], mpi_int_t, p, iam, grid->comm, &send_req[p]); } if ( RecvCounts[p] ) { MPI_Irecv(&ind_torecv[ptr_ind_torecv[p]], RecvCounts[p], mpi_int_t, p, p, grid->comm, &recv_req[p]); } } for (p = 0; p < procs; ++p) { if ( SendCounts[p] ) MPI_Wait(&send_req[p], &status); if ( RecvCounts[p] ) MPI_Wait(&recv_req[p], &status); } /* Allocate storage for the X values to to transferred. */ if ( TotalIndSend && !(val_torecv = doublecomplexMalloc_dist(TotalIndSend)) ) ABORT("Malloc fails for val_torecv[]."); if ( TotalValSend && !(val_tosend = doublecomplexMalloc_dist(TotalValSend)) ) ABORT("Malloc fails for val_tosend[]."); gsmv_comm->extern_start = extern_start; gsmv_comm->ind_tosend = ind_tosend; gsmv_comm->ind_torecv = ind_torecv; gsmv_comm->ptr_ind_tosend = ptr_ind_tosend; gsmv_comm->ptr_ind_torecv = ptr_ind_torecv; gsmv_comm->SendCounts = SendCounts; gsmv_comm->RecvCounts = RecvCounts; gsmv_comm->val_tosend = val_tosend; gsmv_comm->val_torecv = val_torecv; gsmv_comm->TotalIndSend = TotalIndSend; gsmv_comm->TotalValSend = TotalValSend; SUPERLU_FREE(spa); SUPERLU_FREE(send_req); #if ( DEBUGlevel>=1 ) PrintInt10("pzgsmv_init::rowptr", m_loc+1, rowptr); PrintInt10("pzgsmv_init::extern_start", m_loc, extern_start); CHECK_MALLOC(iam, "Exit pzgsmv_init()"); #endif } /* PZGSMV_INIT */ /* * Performs sparse matrix-vector multiplication. */ void pzgsmv ( int_t abs, /* Input. Do abs(A)*abs(x). */ SuperMatrix *A_internal, /* Input. Matrix A permuted by columns. The column indices are translated into the relative positions in the gathered x-vector. The type of A can be: Stype = NR_loc; Dtype = SLU_Z; Mtype = GE. */ gridinfo_t *grid, /* Input */ pzgsmv_comm_t *gsmv_comm, /* Input. The data structure for communication. */ doublecomplex x[], /* Input. The distributed source vector */ doublecomplex ax[] /* Output. The distributed destination vector */ ) { NRformat_loc *Astore; int iam, procs; int_t i, j, p, m, m_loc, n, fst_row, jcol; int_t *colind, *rowptr; int *SendCounts, *RecvCounts; int_t *ind_tosend, *ind_torecv, *ptr_ind_tosend, *ptr_ind_torecv; int_t *extern_start, TotalValSend; doublecomplex *nzval, *val_tosend, *val_torecv; doublecomplex zero = {0.0, 0.0}, temp; double *ax_abs = (double *) ax; MPI_Request *send_req, *recv_req; MPI_Status status; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(grid->iam, "Enter pzgsmv()"); #endif /* ------------------------------------------------------------ INITIALIZATION. ------------------------------------------------------------*/ iam = grid->iam; procs = grid->nprow * grid->npcol; Astore = (NRformat_loc *) A_internal->Store; m = A_internal->nrow; n = A_internal->ncol; m_loc = Astore->m_loc; fst_row = Astore->fst_row; colind = Astore->colind; rowptr = Astore->rowptr; nzval = (doublecomplex *) Astore->nzval; extern_start = gsmv_comm->extern_start; ind_torecv = gsmv_comm->ind_torecv; ptr_ind_tosend = gsmv_comm->ptr_ind_tosend; ptr_ind_torecv = gsmv_comm->ptr_ind_torecv; SendCounts = gsmv_comm->SendCounts; RecvCounts = gsmv_comm->RecvCounts; val_tosend = (doublecomplex *) gsmv_comm->val_tosend; val_torecv = (doublecomplex *) gsmv_comm->val_torecv; TotalValSend = gsmv_comm->TotalValSend; /* ------------------------------------------------------------ COPY THE X VALUES INTO THE SEND BUFFER. ------------------------------------------------------------*/ for (i = 0; i < TotalValSend; ++i) { j = ind_torecv[i] - fst_row; /* Relative index in x[] */ val_tosend[i] = x[j]; } /* ------------------------------------------------------------ COMMUNICATE THE X VALUES. ------------------------------------------------------------*/ if ( !(send_req = (MPI_Request *) SUPERLU_MALLOC(2*procs *sizeof(MPI_Request)))) ABORT("Malloc fails for recv_req[]."); recv_req = send_req + procs; for (p = 0; p < procs; ++p) { if ( RecvCounts[p] ) { MPI_Isend(&val_tosend[ptr_ind_torecv[p]], RecvCounts[p], SuperLU_MPI_DOUBLE_COMPLEX, p, iam, grid->comm, &send_req[p]); } if ( SendCounts[p] ) { MPI_Irecv(&val_torecv[ptr_ind_tosend[p]], SendCounts[p], SuperLU_MPI_DOUBLE_COMPLEX, p, p, grid->comm, &recv_req[p]); } } for (p = 0; p < procs; ++p) { if ( RecvCounts[p] ) MPI_Wait(&send_req[p], &status); if ( SendCounts[p] ) MPI_Wait(&recv_req[p], &status); } /* ------------------------------------------------------------ PERFORM THE ACTUAL MULTIPLICATION. ------------------------------------------------------------*/ if ( abs ) { /* Perform abs(A)*abs(x) */ for (i = 0; i < m_loc; ++i) { /* Loop through each row */ ax_abs[i] = 0.0; /* Multiply the local part. */ for (j = rowptr[i]; j < extern_start[i]; ++j) { jcol = colind[j]; ax_abs[i] += z_abs1(&nzval[j]) * z_abs1(&x[jcol]); } /* Multiply the external part. */ for (; j < rowptr[i+1]; ++j) { jcol = colind[j]; ax_abs[i] += z_abs1(&nzval[j]) * z_abs(&val_torecv[jcol]); } } } else { for (i = 0; i < m_loc; ++i) { /* Loop through each row */ ax[i] = zero; /* Multiply the local part. */ for (j = rowptr[i]; j < extern_start[i]; ++j) { jcol = colind[j]; zz_mult(&temp, &nzval[j], &x[jcol]); z_add(&ax[i], &ax[i], &temp); } /* Multiply the external part. */ for (; j < rowptr[i+1]; ++j) { jcol = colind[j]; zz_mult(&temp, &nzval[j], &val_torecv[jcol]); z_add(&ax[i], &ax[i], &temp); } } } SUPERLU_FREE(send_req); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit pzgsmv()"); #endif } /* PZGSMV */ void pzgsmv_finalize(pzgsmv_comm_t *gsmv_comm) { int_t *it; doublecomplex *dt; SUPERLU_FREE(gsmv_comm->extern_start); if ( it = gsmv_comm->ind_tosend ) SUPERLU_FREE(it); if ( it = gsmv_comm->ind_torecv ) SUPERLU_FREE(it); SUPERLU_FREE(gsmv_comm->ptr_ind_tosend); SUPERLU_FREE(gsmv_comm->SendCounts); if ( dt = gsmv_comm->val_tosend ) SUPERLU_FREE(dt); if ( dt = gsmv_comm->val_torecv ) SUPERLU_FREE(dt); } SuperLU_DIST_5.3.0/EXAMPLE/zlook_ahead_update.c0000644013363400111340000001660113233431301017721 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /************************************************************************/ /*! @file * \brief Look-ahead update of the Schur complement. * *
 * -- Distributed SuperLU routine (version 4.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * August 15, 2014
 *
 */
#ifdef ISORT
while (j < nub && iperm_u[j] <= k0 + num_look_aheads)
#else
while (j < nub && perm_u[2 * j] <= k0 + num_look_aheads)
#endif
{
    doublecomplex zero = {0.0, 0.0};

    arrive_at_ublock
        (j, &iukp, &rukp, &jb, &ljb, &nsupc,
         iukp0, rukp0, usub, perm_u, xsup, grid);
    j++;
    jj0++;
    jj = iukp;
    lptr = lptr0;
    luptr = luptr0;

    while (usub[jj] == klst) ++jj;

    ldu = klst - usub[jj++];
    ncols = 1;
    full = 1;
    for (; jj < iukp + nsupc; ++jj) {
        segsize = klst - usub[jj];
        if (segsize) {
            ++ncols;
            if (segsize != ldu)
                full = 0;
            if (segsize > ldu)
                ldu = segsize;
        }
    }
#if ( DEBUGlevel>=3 )
    ++num_update;
#endif
    if (0) {
        tempu = &uval[rukp];
    }
    else  {                    /* Copy block U(k,j) into tempU2d. */
#if ( DEBUGlevel>=3 )
        printf ("(%d) full=%d,k=%d,jb=%d,ldu=%d,ncols=%d,nsupc=%d\n",
                iam, full, k, jb, ldu, ncols, nsupc);
        ++num_copy;
#endif
        tempu = bigU;
        for (jj = iukp; jj < iukp + nsupc; ++jj) {
            segsize = klst - usub[jj];
            if (segsize) {
                lead_zero = ldu - segsize;
                for (i = 0; i < lead_zero; ++i) tempu[i] = zero;
                tempu += lead_zero;
                for (i = 0; i < segsize; ++i) {
                    tempu[i] = uval[rukp + i];
                }
                rukp += segsize;
                tempu += segsize;
            }
        }
        tempu = bigU;
        rukp -= usub[iukp - 1]; /* Return to start of U(k,j). */
    }                           /* if full ... */

    nbrow = lsub[1];
    if (myrow == krow) nbrow = lsub[1] - lsub[3]; /* skip diagonal block for those rows */
// double ttx =SuperLU_timer_();
    
#pragma omp parallel for \
                    private(lb,lptr,luptr,ib,tempv ) \
                    default(shared) schedule(dynamic)
    for (lb = 0; lb < nlb; lb++) {
        
        int_t temp_nbrow;
        int_t lptr = lptr0;
        int_t luptr = luptr0;
        for (int i = 0; i < lb; ++i) {
            ib = lsub[lptr];    /* Row block L(i,k). */
            temp_nbrow = lsub[lptr + 1];    /* Number of full rows. */
            lptr += LB_DESCRIPTOR;  /* Skip descriptor. */
            lptr += temp_nbrow;
            luptr += temp_nbrow;
            
        }
        
        int_t thread_id = omp_get_thread_num ();
        doublecomplex * tempv = bigV + ldt*ldt*thread_id;

        int *indirect_thread = indirect + ldt * thread_id;
        int *indirect2_thread   = indirect2 + ldt*thread_id;        
        ib = lsub[lptr];        /* Row block L(i,k). */
        temp_nbrow = lsub[lptr + 1];    /* Number of full rows. */
        assert (temp_nbrow <= nbrow);

        lptr += LB_DESCRIPTOR;  /* Skip descriptor. */

        /* calling gemm */
#if defined (USE_VENDOR_BLAS)
        zgemm("N", "N", &temp_nbrow, &ncols, &ldu, &alpha,
                   &lusup[luptr + (knsupc - ldu) * nsupr], &nsupr,
                   tempu, &ldu, &beta, tempv, &temp_nbrow, 1, 1);
#else
        zgemm("N", "N", &temp_nbrow, &ncols, &ldu, &alpha,
                   &lusup[luptr + (knsupc - ldu) * nsupr], &nsupr,
                   tempu, &ldu, &beta, tempv, &temp_nbrow );
#endif

        /* Now scattering the output*/
        if (ib < jb) {    /* A(i,j) is in U. */
            zscatter_u (ib, jb,
                       nsupc, iukp, xsup,
                       klst, temp_nbrow,
                       lptr, temp_nbrow, lsub,
                       usub, tempv, Ufstnz_br_ptr, Unzval_br_ptr, grid);
        } else {          /* A(i,j) is in L. */
            zscatter_l (ib, ljb, nsupc, iukp, xsup, klst, temp_nbrow, lptr,
                       temp_nbrow, usub, lsub, tempv,
                       indirect_thread, indirect2_thread, 
                       Lrowind_bc_ptr, Lnzval_bc_ptr, grid);
        }

    } /* parallel for lb = 0, ... */

    rukp += usub[iukp - 1];     /* Move to block U(k,j+1) */
    iukp += nsupc;

    /* ==================================== *
     * == factorize and send if possible == *
     * ==================================== */
    kk = jb;
    kcol = PCOL (kk, grid);
#ifdef ISORT
    kk0 = iperm_u[j - 1];
#else
    kk0 = perm_u[2 * (j - 1)];
#endif
    look_id = kk0 % (1 + num_look_aheads);

    if (look_ahead[kk] == k0 && kcol == mycol) {
    /* current column is the last dependency */
        look_id = kk0 % (1 + num_look_aheads);

        /* Factor diagonal and subdiagonal blocks and test for exact
           singularity.  */
        factored[kk] = 0;
        /* double ttt1 = SuperLU_timer_(); */
#if ( VAMPIR>=1 )
        VT_begin (5);
#endif

        PZGSTRF2(options, nsupers, kk0, kk, thresh, Glu_persist, grid, Llu,
                  U_diag_blk_send_req, tag_ub, stat, info);

#if ( VAMPIR>=1 )
        VT_end (5);
#endif
        /* stat->time7 += SuperLU_timer_() - ttt1; */

        /* Process column *kcol+1* multicasts numeric values of L(:,k+1)
           to process rows. */
        send_req = send_reqs[look_id];
        msgcnt = msgcnts[look_id];

        lk = LBj (kk, grid);    /* Local block number. */
        lsub1 = Lrowind_bc_ptr[lk];
        lusup1 = Lnzval_bc_ptr[lk];
        if (lsub1) {
            msgcnt[0] = lsub1[1] + BC_HEADER + lsub1[0] * LB_DESCRIPTOR;
            msgcnt[1] = lsub1[1] * SuperSize (kk);
        } else {
            msgcnt[0] = 0;
            msgcnt[1] = 0;
        }

        scp = &grid->rscp;      /* The scope of process row. */
        for (pj = 0; pj < Pc; ++pj) {
            if (ToSendR[lk][pj] != EMPTY) {
#if ( PROFlevel>=1 )
                TIC (t1);
#endif
#if ( VAMPIR>=1 )
                VT_begin (1);
#endif
                MPI_Isend (lsub1, msgcnt[0], mpi_int_t, pj,
                           SLU_MPI_TAG (0, kk0) /* (4*kk0)%tag_ub */ ,
                           scp->comm, &send_req[pj]);
                MPI_Isend (lusup1, msgcnt[1], SuperLU_MPI_DOUBLE_COMPLEX, pj,
                           SLU_MPI_TAG (1, kk0) /* (4*kk0+1)%tag_ub */ ,
                           scp->comm, &send_req[pj + Pc]);
#if ( VAMPIR>=1 )
                VT_end (1);
#endif
#if ( PROFlevel>=1 )
                TOC (t2, t1);
                stat->utime[COMM] += t2;
                msg_cnt += 2;
                msg_vol += msgcnt[0] * iword + msgcnt[1] * dword;
#endif
#if ( DEBUGlevel>=2 )
                printf ("[%d] -2- Send L(:,%4d): #lsub %4d, #lusup %4d to Pj %2d\n",
                        iam, kk, msgcnt[0], msgcnt[1], pj);
		if (kk==3) {
		    PrintInt10("..send lsub", msgcnt[0], lsub1);
 		    PrintDoublecomplex("..send lusup", msgcnt[1], lusup1);
		}
#endif
            }   /*if ( ToSendR[lk][pj] != EMPTY ) */
        }       /* for pj ... */
    }           /*if( look_ahead[kk] == k0 && kcol == mycol ) */
}               /* while j < nub and perm_u[j] 
 * -- Distributed SuperLU routine (version 4.1) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * March 15, 2003
 * April 5, 2015
 * 
*/ #include #include #include "superlu_zdefs.h" /*! \brief * *
 * Purpose
 * =======
 *
 * The driver program PZDRIVE1.
 *
 * This example illustrates how to use PZGSSVX to
 * solve systems with the same A but different right-hand side.
 * In this case, we factorize A only once in the first call to
 * PZGSSVX, and reuse the following data structures
 * in the subsequent call to PZGSSVX:
 *        ScalePermstruct  : DiagScale, R, C, perm_r, perm_c
 *        LUstruct         : Glu_persist, Llu
 * 
 * With MPICH,  program may be run by typing:
 *    mpiexec -n  pzdrive1 -r  -c  big.rua
 * 
*/ int main(int argc, char *argv[]) { superlu_dist_options_t options; SuperLUStat_t stat; SuperMatrix A; ScalePermstruct_t ScalePermstruct; LUstruct_t LUstruct; SOLVEstruct_t SOLVEstruct; gridinfo_t grid; double *berr; doublecomplex *b, *xtrue, *b1; int i, j, m, n; int nprow, npcol; int iam, info, ldb, ldx, nrhs; char **cpp, c; FILE *fp, *fopen(); int cpp_defs(); nprow = 1; /* Default process rows. */ npcol = 1; /* Default process columns. */ nrhs = 1; /* Number of right-hand side. */ /* ------------------------------------------------------------ INITIALIZE MPI ENVIRONMENT. ------------------------------------------------------------*/ MPI_Init( &argc, &argv ); /* Parse command line argv[]. */ for (cpp = argv+1; *cpp; ++cpp) { if ( **cpp == '-' ) { c = *(*cpp+1); ++cpp; switch (c) { case 'h': printf("Options:\n"); printf("\t-r : process rows (default %d)\n", nprow); printf("\t-c : process columns (default %d)\n", npcol); exit(0); break; case 'r': nprow = atoi(*cpp); break; case 'c': npcol = atoi(*cpp); break; } } else { /* Last arg is considered a filename */ if ( !(fp = fopen(*cpp, "r")) ) { ABORT("File does not exist"); } break; } } /* ------------------------------------------------------------ INITIALIZE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ superlu_gridinit(MPI_COMM_WORLD, nprow, npcol, &grid); /* Bail out if I do not belong in the grid. */ iam = grid.iam; if ( iam >= nprow * npcol ) goto out; if ( !iam ) { int v_major, v_minor, v_bugfix; superlu_dist_GetVersionNumber(&v_major, &v_minor, &v_bugfix); printf("Library version:\t%d.%d.%d\n", v_major, v_minor, v_bugfix); printf("Input matrix file:\t%s\n", *cpp); printf("Process grid:\t\t%d X %d\n", (int)grid.nprow, (int)grid.npcol); fflush(stdout); } #if ( VAMPIR>=1 ) VT_traceoff(); #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter main()"); #endif /* ------------------------------------------------------------ GET THE MATRIX FROM FILE AND SETUP THE RIGHT HAND SIDE. ------------------------------------------------------------*/ zcreate_matrix(&A, nrhs, &b, &ldb, &xtrue, &ldx, fp, &grid); if ( !(b1 = doublecomplexMalloc_dist(ldb * nrhs)) ) ABORT("Malloc fails for b1[]"); for (j = 0; j < nrhs; ++j) for (i = 0; i < ldb; ++i) b1[i+j*ldb] = b[i+j*ldb]; if ( !(berr = doubleMalloc_dist(nrhs)) ) ABORT("Malloc fails for berr[]."); /* ------------------------------------------------------------ WE SOLVE THE LINEAR SYSTEM FOR THE FIRST TIME. ------------------------------------------------------------*/ /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = METIS_AT_PLUS_A; options.RowPerm = LargeDiag; options.ReplaceTinyPivot = NO; options.Trans = NOTRANS; options.IterRefine = DOUBLE; options.SolveInitialized = NO; options.RefineInitialized = NO; options.PrintStat = YES; */ set_default_options_dist(&options); if (!iam) { print_sp_ienv_dist(&options); print_options_dist(&options); fflush(stdout); } m = A.nrow; n = A.ncol; /* Initialize ScalePermstruct and LUstruct. */ ScalePermstructInit(m, n, &ScalePermstruct); LUstructInit(n, &LUstruct); /* Initialize the statistics variables. */ PStatInit(&stat); /* Call the linear equation solver. */ pzgssvx(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid, &LUstruct, &SOLVEstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ if ( !iam ) printf("\tSolve the first system:\n"); pzinf_norm_error(iam, ((NRformat_loc *)A.Store)->m_loc, nrhs, b, ldb, xtrue, ldx, &grid); PStatPrint(&options, &stat, &grid); /* Print the statistics. */ PStatFree(&stat); /* ------------------------------------------------------------ NOW WE SOLVE ANOTHER SYSTEM WITH THE SAME A BUT DIFFERENT RIGHT-HAND SIDE, WE WILL USE THE EXISTING L AND U FACTORS IN LUSTRUCT OBTAINED FROM A PREVIOUS FATORIZATION. ------------------------------------------------------------*/ options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */ PStatInit(&stat); /* Initialize the statistics variables. */ pzgssvx(&options, &A, &ScalePermstruct, b1, ldb, nrhs, &grid, &LUstruct, &SOLVEstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ if ( !iam ) printf("\tSolve the system with a different B:\n"); pzinf_norm_error(iam, ((NRformat_loc *)A.Store)->m_loc, nrhs, b1, ldb, xtrue, ldx, &grid); PStatPrint(&options, &stat, &grid); /* Print the statistics. */ /* ------------------------------------------------------------ DEALLOCATE STORAGE. ------------------------------------------------------------*/ PStatFree(&stat); Destroy_CompRowLoc_Matrix_dist(&A); ScalePermstructFree(&ScalePermstruct); Destroy_LU(n, &grid, &LUstruct); LUstructFree(&LUstruct); if ( options.SolveInitialized ) { zSolveFinalize(&options, &SOLVEstruct); } SUPERLU_FREE(b); SUPERLU_FREE(b1); SUPERLU_FREE(xtrue); SUPERLU_FREE(berr); /* ------------------------------------------------------------ RELEASE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ out: superlu_gridexit(&grid); /* ------------------------------------------------------------ TERMINATES THE MPI EXECUTION ENVIRONMENT. ------------------------------------------------------------*/ MPI_Finalize(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit main()"); #endif } int cpp_defs() { printf(".. CPP definitions:\n"); #if ( PRNTlevel>=1 ) printf("\tPRNTlevel = %d\n", PRNTlevel); #endif #if ( DEBUGlevel>=1 ) printf("\tDEBUGlevel = %d\n", DEBUGlevel); #endif #if ( PROFlevel>=1 ) printf("\tPROFlevel = %d\n", PROFlevel); #endif #if ( StaticPivot>=1 ) printf("\tStaticPivot = %d\n", StaticPivot); #endif printf("....\n"); return 0; } SuperLU_DIST_5.3.0/EXAMPLE/pzdrive2.c0000644013363400111340000002135613233431301015647 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Driver program for PZGSSVX example * *
 * -- Distributed SuperLU routine (version 5.1.3) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * March 15, 2003
 * April 5, 2015
 * December 31, 2016 version 5.1.3
 * 
*/ #include #include #include "superlu_zdefs.h" /*! \brief * *
 * Purpose
 * =======
 *
 * The driver program PZDRIVE2.
 *
 * This example illustrates how to use  to solve
 * systems repeatedly with the same sparsity pattern of matrix A.
 * In this case, the column permutation vector ScalePermstruct->perm_c is
 * computed once. The following data structures will be reused in the
 * subsequent call to PZGSSVX:
 *        ScalePermstruct : perm_c
 *        LUstruct        : etree
 *
 * With MPICH,  program may be run by typing:
 *    mpiexec -n  pzdrive2 -r  -c  g20.rua
 * 
*/ int main(int argc, char *argv[]) { superlu_dist_options_t options; SuperLUStat_t stat; SuperMatrix A; NRformat_loc *Astore; ScalePermstruct_t ScalePermstruct; LUstruct_t LUstruct; SOLVEstruct_t SOLVEstruct; gridinfo_t grid; double *berr; doublecomplex *b, *b1, *xtrue, *xtrue1; int_t *colind, *colind1, *rowptr, *rowptr1; int_t i, j, m, n, nnz_loc, m_loc; int nprow, npcol; int iam, info, ldb, ldx, nrhs; char **cpp, c; FILE *fp, *fopen(); int cpp_defs(); /* prototypes */ extern int zcreate_matrix_perturbed (SuperMatrix *, int, doublecomplex **, int *, doublecomplex **, int *, FILE *, gridinfo_t *); nprow = 1; /* Default process rows. */ npcol = 1; /* Default process columns. */ nrhs = 1; /* Number of right-hand side. */ /* ------------------------------------------------------------ INITIALIZE MPI ENVIRONMENT. ------------------------------------------------------------*/ MPI_Init( &argc, &argv ); /* Parse command line argv[]. */ for (cpp = argv+1; *cpp; ++cpp) { if ( **cpp == '-' ) { c = *(*cpp+1); ++cpp; switch (c) { case 'h': printf("Options:\n"); printf("\t-r : process rows (default %4d)\n", nprow); printf("\t-c : process columns (default %4d)\n", npcol); exit(0); break; case 'r': nprow = atoi(*cpp); break; case 'c': npcol = atoi(*cpp); break; } } else { /* Last arg is considered a filename */ if ( !(fp = fopen(*cpp, "r")) ) { ABORT("File does not exist"); } break; } } /* ------------------------------------------------------------ INITIALIZE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ superlu_gridinit(MPI_COMM_WORLD, nprow, npcol, &grid); /* Bail out if I do not belong in the grid. */ iam = grid.iam; if ( iam >= nprow * npcol ) goto out; if ( !iam ) { int v_major, v_minor, v_bugfix; superlu_dist_GetVersionNumber(&v_major, &v_minor, &v_bugfix); printf("Library version:\t%d.%d.%d\n", v_major, v_minor, v_bugfix); printf("Input matrix file:\t%s\n", *cpp); printf("Process grid:\t\t%d X %d\n", (int)grid.nprow, (int)grid.npcol); fflush(stdout); } #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter main()"); #endif /* ------------------------------------------------------------ GET THE MATRIX FROM FILE AND SETUP THE RIGHT-HAND SIDE. ------------------------------------------------------------*/ zcreate_matrix(&A, nrhs, &b, &ldb, &xtrue, &ldx, fp, &grid); if ( !(berr = doubleMalloc_dist(nrhs)) ) ABORT("Malloc fails for berr[]."); m = A.nrow; n = A.ncol; Astore = (NRformat_loc *) A.Store; m_loc = Astore->m_loc; /* ------------------------------------------------------------ WE SOLVE THE LINEAR SYSTEM FOR THE FIRST TIME. ------------------------------------------------------------*/ /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = METIS_AT_PLUS_A; options.RowPerm = LargeDiag; options.ReplaceTinyPivot = NO; options.Trans = NOTRANS; options.IterRefine = DOUBLE; options.SolveInitialized = NO; options.RefineInitialized = NO; options.PrintStat = YES; */ set_default_options_dist(&options); if (!iam) { print_sp_ienv_dist(&options); print_options_dist(&options); fflush(stdout); } /* Initialize ScalePermstruct and LUstruct. */ ScalePermstructInit(m, n, &ScalePermstruct); LUstructInit(n, &LUstruct); /* Initialize the statistics variables. */ PStatInit(&stat); /* Call the linear equation solver: factorize and solve. */ pzgssvx(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid, &LUstruct, &SOLVEstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ pzinf_norm_error(iam, m_loc, nrhs, b, ldb, xtrue, ldx, &grid); PStatPrint(&options, &stat, &grid); /* Print the statistics. */ PStatFree(&stat); Destroy_CompRowLoc_Matrix_dist(&A); /* Deallocate storage of matrix A. */ Destroy_LU(n, &grid, &LUstruct); /* Deallocate storage associated with the L and U matrices. */ SUPERLU_FREE(b); /* Free storage of right-hand side. */ SUPERLU_FREE(xtrue); /* Free storage of the exact solution. */ /* ------------------------------------------------------------ NOW WE SOLVE ANOTHER LINEAR SYSTEM. ONLY THE SPARSITY PATTERN OF MATRIX A IS THE SAME. ------------------------------------------------------------*/ options.Fact = SamePattern; if (iam==0) { print_options_dist(&options); #if ( PRNTlevel>=2 ) PrintInt10("perm_r", m, ScalePermstruct.perm_r); PrintInt10("perm_c", n, ScalePermstruct.perm_c); #endif } /* Get the matrix from file, perturbed some diagonal entries to force a different perm_r[]. Set up the right-hand side. */ if ( !(fp = fopen(*cpp, "r")) ) ABORT("File does not exist"); zcreate_matrix_perturbed(&A, nrhs, &b1, &ldb, &xtrue1, &ldx, fp, &grid); PStatInit(&stat); /* Initialize the statistics variables. */ /* Solve the linear system. */ pzgssvx(&options, &A, &ScalePermstruct, b1, ldb, nrhs, &grid, &LUstruct, &SOLVEstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ if ( !iam ) printf("Solve the system with the same sparsity pattern.\n"); pzinf_norm_error(iam, m_loc, nrhs, b1, ldb, xtrue1, ldx, &grid); #if ( PRNTlevel>=2 ) if (iam==0) { PrintInt10("new perm_r", m, ScalePermstruct.perm_r); PrintInt10("new perm_c", n, ScalePermstruct.perm_c); } #endif /* Print the statistics. */ PStatPrint(&options, &stat, &grid); /* ------------------------------------------------------------ DEALLOCATE STORAGE. ------------------------------------------------------------*/ PStatFree(&stat); Destroy_CompRowLoc_Matrix_dist(&A); /* Deallocate storage of matrix A. */ Destroy_LU(n, &grid, &LUstruct); /* Deallocate storage associated with the L and U matrices. */ ScalePermstructFree(&ScalePermstruct); LUstructFree(&LUstruct); /* Deallocate the structure of L and U.*/ if ( options.SolveInitialized ) { zSolveFinalize(&options, &SOLVEstruct); } SUPERLU_FREE(b1); /* Free storage of right-hand side. */ SUPERLU_FREE(xtrue1); /* Free storage of the exact solution. */ SUPERLU_FREE(berr); /* ------------------------------------------------------------ RELEASE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ out: superlu_gridexit(&grid); /* ------------------------------------------------------------ TERMINATES THE MPI EXECUTION ENVIRONMENT. ------------------------------------------------------------*/ MPI_Finalize(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit main()"); #endif } int cpp_defs() { printf(".. CPP definitions:\n"); #if ( PRNTlevel>=1 ) printf("\tPRNTlevel = %d\n", PRNTlevel); #endif #if ( DEBUGlevel>=1 ) printf("\tDEBUGlevel = %d\n", DEBUGlevel); #endif #if ( PROFlevel>=1 ) printf("\tPROFlevel = %d\n", PROFlevel); #endif #if ( StaticPivot>=1 ) printf("\tStaticPivot = %d\n", StaticPivot); #endif printf("....\n"); return 0; } SuperLU_DIST_5.3.0/EXAMPLE/pzdrive3.c0000644013363400111340000002172013233431301015643 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Driver program for PZGSSVX example * *
 * -- Distributed SuperLU routine (version 4.1) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * March 15, 2003
 * April 5, 2015
 * 
*/ #include #include #include "superlu_zdefs.h" /*! \brief * *
 * Purpose
 * =======
 *
 * The driver program PZDRIVE3.
 *
 * This example illustrates how to use PZGSSVX to solve
 * systems repeatedly with the same sparsity pattern and similar
 * numerical values of matrix A.
 * In this case, the column permutation vector and symbolic factorization are
 * computed only once. The following data structures will be reused in the
 * subsequent call to PZGSSVX:
 *        ScalePermstruct : DiagScale, R, C, perm_r, perm_c
 *        LUstruct        : etree, Glu_persist, Llu
 *
 * NOTE:
 * The distributed nonzero structures of L and U remain the same,
 * although the numerical values are different. So 'Llu' is set up once
 * in the first call to PZGSSVX, and reused in the subsequent call.
 *
 * With MPICH,  program may be run by typing:
 *    mpiexec -n  pzdrive3 -r  -c  big.rua
 * 
*/ int main(int argc, char *argv[]) { superlu_dist_options_t options; SuperLUStat_t stat; SuperMatrix A; NRformat_loc *Astore; ScalePermstruct_t ScalePermstruct; LUstruct_t LUstruct; SOLVEstruct_t SOLVEstruct; gridinfo_t grid; double *berr; doublecomplex *b, *b1, *xtrue, *nzval, *nzval1; int_t *colind, *colind1, *rowptr, *rowptr1; int_t i, j, m, n, nnz_loc, m_loc, fst_row; int nprow, npcol; int iam, info, ldb, ldx, nrhs; char **cpp, c; FILE *fp, *fopen(); int cpp_defs(); nprow = 1; /* Default process rows. */ npcol = 1; /* Default process columns. */ nrhs = 1; /* Number of right-hand side. */ /* ------------------------------------------------------------ INITIALIZE MPI ENVIRONMENT. ------------------------------------------------------------*/ MPI_Init( &argc, &argv ); /* Parse command line argv[]. */ for (cpp = argv+1; *cpp; ++cpp) { if ( **cpp == '-' ) { c = *(*cpp+1); ++cpp; switch (c) { case 'h': printf("Options:\n"); printf("\t-r : process rows (default %d)\n", nprow); printf("\t-c : process columns (default %d)\n", npcol); exit(0); break; case 'r': nprow = atoi(*cpp); break; case 'c': npcol = atoi(*cpp); break; } } else { /* Last arg is considered a filename */ if ( !(fp = fopen(*cpp, "r")) ) { ABORT("File does not exist"); } break; } } /* ------------------------------------------------------------ INITIALIZE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ superlu_gridinit(MPI_COMM_WORLD, nprow, npcol, &grid); /* Bail out if I do not belong in the grid. */ iam = grid.iam; if ( iam >= nprow * npcol ) goto out; if ( !iam ) { int v_major, v_minor, v_bugfix; superlu_dist_GetVersionNumber(&v_major, &v_minor, &v_bugfix); printf("Library version:\t%d.%d.%d\n", v_major, v_minor, v_bugfix); printf("Input matrix file:\t%s\n", *cpp); printf("Process grid:\t\t%d X %d\n", (int)grid.nprow, (int)grid.npcol); fflush(stdout); } #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter main()"); #endif /* ------------------------------------------------------------ GET THE MATRIX FROM FILE AND SETUP THE RIGHT HAND SIDE. ------------------------------------------------------------*/ zcreate_matrix(&A, nrhs, &b, &ldb, &xtrue, &ldx, fp, &grid); if ( !(b1 = doublecomplexMalloc_dist(ldb * nrhs)) ) ABORT("Malloc fails for b1[]"); for (j = 0; j < nrhs; ++j) for (i = 0; i < ldb; ++i) b1[i+j*ldb] = b[i+j*ldb]; if ( !(berr = doubleMalloc_dist(nrhs)) ) ABORT("Malloc fails for berr[]."); m = A.nrow; n = A.ncol; /* Save a copy of the matrix A. */ Astore = (NRformat_loc *) A.Store; nnz_loc = Astore->nnz_loc; m_loc = Astore->m_loc; fst_row = Astore->fst_row; nzval = Astore->nzval; colind = Astore->colind; rowptr = Astore->rowptr; nzval1 = doublecomplexMalloc_dist(nnz_loc); colind1 = intMalloc_dist(nnz_loc); rowptr1 = intMalloc_dist(m_loc+1); for (i = 0; i < nnz_loc; ++i) { nzval1[i] = nzval[i]; colind1[i] = colind[i]; } for (i = 0; i < m_loc+1; ++i) rowptr1[i] = rowptr[i]; /* ------------------------------------------------------------ WE SOLVE THE LINEAR SYSTEM FOR THE FIRST TIME. ------------------------------------------------------------*/ /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = METIS_AT_PLUS_A; options.RowPerm = LargeDiag; options.ReplaceTinyPivot = NO; options.Trans = NOTRANS; options.IterRefine = DOUBLE; options.SolveInitialized = NO; options.RefineInitialized = NO; options.PrintStat = YES; */ set_default_options_dist(&options); if (!iam) { print_sp_ienv_dist(&options); print_options_dist(&options); fflush(stdout); } /* Initialize ScalePermstruct and LUstruct. */ ScalePermstructInit(m, n, &ScalePermstruct); LUstructInit(n, &LUstruct); /* Initialize the statistics variables. */ PStatInit(&stat); /* Call the linear equation solver: factorize and solve. */ pzgssvx(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid, &LUstruct, &SOLVEstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ pzinf_norm_error(iam, m_loc, nrhs, b, ldb, xtrue, ldx, &grid); PStatPrint(&options, &stat, &grid); /* Print the statistics. */ PStatFree(&stat); Destroy_CompRowLoc_Matrix_dist(&A); /* Deallocate storage of matrix A. */ SUPERLU_FREE(b); /* Free storage of right-hand side. */ /* ------------------------------------------------------------ NOW WE SOLVE ANOTHER LINEAR SYSTEM. THE MATRIX A HAS THE SAME SPARSITY PATTERN AND THE SIMILAR NUMERICAL VALUES AS THAT IN A PREVIOUS SYSTEM. ------------------------------------------------------------*/ options.Fact = SamePattern_SameRowPerm; PStatInit(&stat); /* Initialize the statistics variables. */ /* Set up the local A in NR_loc format */ zCreate_CompRowLoc_Matrix_dist(&A, m, n, nnz_loc, m_loc, fst_row, nzval1, colind1, rowptr1, SLU_NR_loc, SLU_Z, SLU_GE); /* Solve the linear system. */ pzgssvx(&options, &A, &ScalePermstruct, b1, ldb, nrhs, &grid, &LUstruct, &SOLVEstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ if ( !iam ) printf("Solve a system with the same pattern and similar values.\n"); pzinf_norm_error(iam, m_loc, nrhs, b1, ldb, xtrue, ldx, &grid); /* Print the statistics. */ PStatPrint(&options, &stat, &grid); /* ------------------------------------------------------------ DEALLOCATE STORAGE. ------------------------------------------------------------*/ PStatFree(&stat); Destroy_CompRowLoc_Matrix_dist(&A); /* Deallocate storage of matrix A. */ Destroy_LU(n, &grid, &LUstruct); /* Deallocate storage associated with the L and U matrices. */ ScalePermstructFree(&ScalePermstruct); LUstructFree(&LUstruct); /* Deallocate the structure of L and U.*/ if ( options.SolveInitialized ) { zSolveFinalize(&options, &SOLVEstruct); } SUPERLU_FREE(b1); /* Free storage of right-hand side. */ SUPERLU_FREE(xtrue); /* Free storage of the exact solution. */ SUPERLU_FREE(berr); /* ------------------------------------------------------------ RELEASE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ out: superlu_gridexit(&grid); /* ------------------------------------------------------------ TERMINATES THE MPI EXECUTION ENVIRONMENT. ------------------------------------------------------------*/ MPI_Finalize(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit main()"); #endif } int cpp_defs() { printf(".. CPP definitions:\n"); #if ( PRNTlevel>=1 ) printf("\tPRNTlevel = %d\n", PRNTlevel); #endif #if ( DEBUGlevel>=1 ) printf("\tDEBUGlevel = %d\n", DEBUGlevel); #endif #if ( PROFlevel>=1 ) printf("\tPROFlevel = %d\n", PROFlevel); #endif #if ( StaticPivot>=1 ) printf("\tStaticPivot = %d\n", StaticPivot); #endif printf("....\n"); return 0; } SuperLU_DIST_5.3.0/EXAMPLE/pzdrive4.c0000644013363400111340000002155713233431301015654 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief This example illustrates how to divide up the processes into subgroups * *
 * -- Distributed SuperLU routine (version 4.1) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * March 15, 2003
 * April 5, 2015
 * 
*/ #include #include "superlu_zdefs.h" /*! \brief * *
 * Purpose
 * =======
 *
 * The driver program PZDRIVE4.
 *
 * This example illustrates how to divide up the processes into
 * subgroups (multiple grids) such that each subgroup solves a linear
 * system independently from the other.
 *
 * In this example, there are 2 subgroups:
 *  1. subgroup 1 consists of processes 0 to 5 arranged as
 *     a 2-by-3 process grid.
 *  2. subgroup 2 consists of processes 6 to 9 arranged as
 *     a 2-by-2 process grid.
 *
 * With MPICH,  program may be run by typing:
 *    mpiexec -n 10 pzdrive4 big.rua
 * 
*/ int main(int argc, char *argv[]) { superlu_dist_options_t options; SuperLUStat_t stat; SuperMatrix A; ScalePermstruct_t ScalePermstruct; LUstruct_t LUstruct; SOLVEstruct_t SOLVEstruct; gridinfo_t grid1, grid2; double *berr; doublecomplex *a, *b, *xtrue; int_t *asub, *xa; int_t i, j, m, n; int nprow, npcol, ldumap, p; int_t usermap[6]; int iam, info, ldb, ldx, nprocs; int nrhs = 1; /* Number of right-hand side. */ char **cpp, c; FILE *fp, *fopen(); int cpp_defs(); /* ------------------------------------------------------------ INITIALIZE MPI ENVIRONMENT. ------------------------------------------------------------*/ MPI_Init( &argc, &argv ); MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); if ( nprocs < 10 ) { fprintf(stderr, "Requires at least 10 processes\n"); exit(-1); } /* Parse command line argv[]. */ for (cpp = argv+1; *cpp; ++cpp) { if ( **cpp == '-' ) { c = *(*cpp+1); ++cpp; switch (c) { case 'h': printf("Options:\n"); printf("\t-r : process rows (default %d)\n", nprow); printf("\t-c : process columns (default %d)\n", npcol); exit(0); break; case 'r': nprow = atoi(*cpp); break; case 'c': npcol = atoi(*cpp); break; } } else { /* Last arg is considered a filename */ if ( !(fp = fopen(*cpp, "r")) ) { ABORT("File does not exist"); } break; } } /* ------------------------------------------------------------ INITIALIZE THE SUPERLU PROCESS GRID 1. ------------------------------------------------------------*/ nprow = 2; npcol = 3; ldumap = 2; p = 0; /* Grid 1 starts from process 0. */ for (i = 0; i < nprow; ++i) for (j = 0; j < npcol; ++j) usermap[i+j*ldumap] = p++; superlu_gridmap(MPI_COMM_WORLD, nprow, npcol, usermap, ldumap, &grid1); /* ------------------------------------------------------------ INITIALIZE THE SUPERLU PROCESS GRID 2. ------------------------------------------------------------*/ nprow = 2; npcol = 2; ldumap = 2; p = 6; /* Grid 2 starts from process 6. */ for (i = 0; i < nprow; ++i) for (j = 0; j < npcol; ++j) usermap[i+j*ldumap] = p++; superlu_gridmap(MPI_COMM_WORLD, nprow, npcol, usermap, ldumap, &grid2); /* Bail out if I do not belong in any of the 2 grids. */ MPI_Comm_rank( MPI_COMM_WORLD, &iam ); if ( iam >= 10 ) goto out; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter main()"); #endif if ( iam >= 0 && iam < 6 ) { /* I am in grid 1. */ iam = grid1.iam; /* Get the logical number in the new grid. */ /* ------------------------------------------------------------ GET THE MATRIX FROM FILE AND SETUP THE RIGHT HAND SIDE. ------------------------------------------------------------*/ zcreate_matrix(&A, nrhs, &b, &ldb, &xtrue, &ldx, fp, &grid1); if ( !(berr = doubleMalloc_dist(nrhs)) ) ABORT("Malloc fails for berr[]."); /* ------------------------------------------------------------ NOW WE SOLVE THE LINEAR SYSTEM. ------------------------------------------------------------*/ /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = METIS_AT_PLUS_A; options.RowPerm = LargeDiag; options.ReplaceTinyPivot = NO; options.Trans = NOTRANS; options.IterRefine = DOUBLE; options.SolveInitialized = NO; options.RefineInitialized = NO; options.PrintStat = YES; */ set_default_options_dist(&options); if (!iam) { print_sp_ienv_dist(&options); print_options_dist(&options); } m = A.nrow; n = A.ncol; /* Initialize ScalePermstruct and LUstruct. */ ScalePermstructInit(m, n, &ScalePermstruct); LUstructInit(n, &LUstruct); /* Initialize the statistics variables. */ PStatInit(&stat); /* Call the linear equation solver. */ pzgssvx(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid1, &LUstruct, &SOLVEstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ pzinf_norm_error(iam, ((NRformat_loc *)A.Store)->m_loc, nrhs, b, ldb, xtrue, ldx, &grid1); /* Print the statistics. */ PStatPrint(&options, &stat, &grid1); /* ------------------------------------------------------------ DEALLOCATE STORAGE. ------------------------------------------------------------*/ PStatFree(&stat); Destroy_CompRowLoc_Matrix_dist(&A); ScalePermstructFree(&ScalePermstruct); Destroy_LU(n, &grid1, &LUstruct); LUstructFree(&LUstruct); if ( options.SolveInitialized ) { zSolveFinalize(&options, &SOLVEstruct); } SUPERLU_FREE(b); SUPERLU_FREE(xtrue); SUPERLU_FREE(berr); } else { /* I am in grid 2. */ iam = grid2.iam; /* Get the logical number in the new grid. */ /* ------------------------------------------------------------ GET THE MATRIX FROM FILE AND SETUP THE RIGHT HAND SIDE. ------------------------------------------------------------*/ zcreate_matrix(&A, nrhs, &b, &ldb, &xtrue, &ldx, fp, &grid2); if ( !(berr = doubleMalloc_dist(nrhs)) ) ABORT("Malloc fails for berr[]."); /* ------------------------------------------------------------ NOW WE SOLVE THE LINEAR SYSTEM. ------------------------------------------------------------*/ /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = MMD_AT_PLUS_A; options.RowPerm = LargeDiag; options.ReplaceTinyPivot = YES; options.Trans = NOTRANS; options.IterRefine = DOUBLE; options.SolveInitialized = NO; options.RefineInitialized = NO; options.PrintStat = YES; */ set_default_options_dist(&options); m = A.nrow; n = A.ncol; /* Initialize ScalePermstruct and LUstruct. */ ScalePermstructInit(m, n, &ScalePermstruct); LUstructInit(n, &LUstruct); /* Initialize the statistics variables. */ PStatInit(&stat); /* Call the linear equation solver. */ pzgssvx(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid2, &LUstruct, &SOLVEstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ pzinf_norm_error(iam, ((NRformat_loc *)A.Store)->m_loc, nrhs, b, ldb, xtrue, ldx, &grid2); /* Print the statistics. */ PStatPrint(&options, &stat, &grid2); /* ------------------------------------------------------------ DEALLOCATE STORAGE. ------------------------------------------------------------*/ PStatFree(&stat); Destroy_CompRowLoc_Matrix_dist(&A); ScalePermstructFree(&ScalePermstruct); Destroy_LU(n, &grid2, &LUstruct); LUstructFree(&LUstruct); if ( options.SolveInitialized ) { zSolveFinalize(&options, &SOLVEstruct); } SUPERLU_FREE(b); SUPERLU_FREE(xtrue); SUPERLU_FREE(berr); } /* ------------------------------------------------------------ RELEASE THE SUPERLU PROCESS GRIDS. ------------------------------------------------------------*/ superlu_gridexit(&grid1); superlu_gridexit(&grid2); out: /* ------------------------------------------------------------ TERMINATES THE MPI EXECUTION ENVIRONMENT. ------------------------------------------------------------*/ MPI_Finalize(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit main()"); #endif } SuperLU_DIST_5.3.0/EXAMPLE/zreadhb.c0000644013363400111340000002073013233431301015514 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Read a DOUBLE COMPLEX PRECISION matrix stored in Harwell-Boeing format * *
 * -- Distributed SuperLU routine (version 1.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 * 
*/ #include "dcomplex.h" #include #include #include "superlu_zdefs.h" /* * Prototypes */ static void ReadVector(FILE *, int_t, int_t *, int_t, int_t); static void zReadValues(FILE *, int_t, doublecomplex *, int_t, int_t); static int DumpLine(FILE *); static int ParseIntFormat(char *, int_t *, int_t *); static int ParseFloatFormat(char *, int_t *, int_t *); /*! \brief * *
 * Purpose
 * =======
 * 
 * Read a DOUBLE COMPLEX PRECISION matrix stored in Harwell-Boeing format 
 * as described below.
 * 
 * Line 1 (A72,A8) 
 *  	Col. 1 - 72   Title (TITLE) 
 *	Col. 73 - 80  Key (KEY) 
 * 
 * Line 2 (5I14) 
 * 	Col. 1 - 14   Total number of lines excluding header (TOTCRD) 
 * 	Col. 15 - 28  Number of lines for pointers (PTRCRD) 
 * 	Col. 29 - 42  Number of lines for row (or variable) indices (INDCRD) 
 * 	Col. 43 - 56  Number of lines for numerical values (VALCRD) 
 *	Col. 57 - 70  Number of lines for right-hand sides (RHSCRD) 
 *                    (including starting guesses and solution vectors 
 *		       if present) 
 *           	      (zero indicates no right-hand side data is present) 
 *
 * Line 3 (A3, 11X, 4I14) 
 *   	Col. 1 - 3    Matrix type (see below) (MXTYPE) 
 * 	Col. 15 - 28  Number of rows (or variables) (NROW) 
 * 	Col. 29 - 42  Number of columns (or elements) (NCOL) 
 *	Col. 43 - 56  Number of row (or variable) indices (NNZERO) 
 *	              (equal to number of entries for assembled matrices) 
 * 	Col. 57 - 70  Number of elemental matrix entries (NELTVL) 
 *	              (zero in the case of assembled matrices) 
 * Line 4 (2A16, 2A20) 
 * 	Col. 1 - 16   Format for pointers (PTRFMT) 
 *	Col. 17 - 32  Format for row (or variable) indices (INDFMT) 
 *	Col. 33 - 52  Format for numerical values of coefficient matrix (VALFMT) 
 * 	Col. 53 - 72 Format for numerical values of right-hand sides (RHSFMT) 
 *
 * Line 5 (A3, 11X, 2I14) Only present if there are right-hand sides present 
 *    	Col. 1 	      Right-hand side type: 
 *	         	  F for full storage or M for same format as matrix 
 *    	Col. 2        G if a starting vector(s) (Guess) is supplied. (RHSTYP) 
 *    	Col. 3        X if an exact solution vector(s) is supplied. 
 *	Col. 15 - 28  Number of right-hand sides (NRHS) 
 *	Col. 29 - 42  Number of row indices (NRHSIX) 
 *          	      (ignored in case of unassembled matrices) 
 *
 * The three character type field on line 3 describes the matrix type. 
 * The following table lists the permitted values for each of the three 
 * characters. As an example of the type field, RSA denotes that the matrix 
 * is real, symmetric, and assembled. 
 *
 * First Character: 
 *	R Real matrix 
 *	C Complex matrix 
 *	P Pattern only (no numerical values supplied) 
 *
 * Second Character: 
 *	S Symmetric 
 *	U Unsymmetric 
 *	H Hermitian 
 *	Z Skew symmetric 
 *	R Rectangular 
 *
 * Third Character: 
 *	A Assembled 
 *	E Elemental matrices (unassembled) 
 * 
*/ void zreadhb_dist(int iam, FILE *fp, int_t *nrow, int_t *ncol, int_t *nonz, doublecomplex **nzval, int_t **rowind, int_t **colptr) { register int_t i, numer_lines, rhscrd = 0; int_t tmp, colnum, colsize, rownum, rowsize, valnum, valsize; char buf[100], type[4]; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(0, "Enter zreadhb_dist()"); #endif /* Line 1 */ fgets(buf, 100, fp); /* Line 2 */ for (i=0; i<5; i++) { fscanf(fp, "%14c", buf); buf[14] = 0; tmp = atoi(buf); /*sscanf(buf, "%d", &tmp);*/ if (i == 3) numer_lines = tmp; if (i == 4 && tmp) rhscrd = tmp; } DumpLine(fp); /* Line 3 */ fscanf(fp, "%3c", type); fscanf(fp, "%11c", buf); /* pad */ type[3] = 0; #if ( DEBUGlevel>=1 ) if ( !iam ) printf("Matrix type %s\n", type); #endif fscanf(fp, "%14c", buf); *nrow = atoi(buf); fscanf(fp, "%14c", buf); *ncol = atoi(buf); fscanf(fp, "%14c", buf); *nonz = atoi(buf); fscanf(fp, "%14c", buf); tmp = atoi(buf); if (tmp != 0) if ( !iam ) printf("This is not an assembled matrix!\n"); if (*nrow != *ncol) if ( !iam ) printf("Matrix is not square.\n"); DumpLine(fp); /* Allocate storage for the three arrays ( nzval, rowind, colptr ) */ zallocateA_dist(*ncol, *nonz, nzval, rowind, colptr); /* Line 4: format statement */ fscanf(fp, "%16c", buf); ParseIntFormat(buf, &colnum, &colsize); fscanf(fp, "%16c", buf); ParseIntFormat(buf, &rownum, &rowsize); fscanf(fp, "%20c", buf); ParseFloatFormat(buf, &valnum, &valsize); fscanf(fp, "%20c", buf); DumpLine(fp); /* Line 5: right-hand side */ if ( rhscrd ) DumpLine(fp); /* skip RHSFMT */ #if ( DEBUGlevel>=1 ) if ( !iam ) { printf("%d rows, %d nonzeros\n", *nrow, *nonz); printf("colnum %d, colsize %d\n", colnum, colsize); printf("rownum %d, rowsize %d\n", rownum, rowsize); printf("valnum %d, valsize %d\n", valnum, valsize); } #endif ReadVector(fp, *ncol+1, *colptr, colnum, colsize); #if ( DEBUGlevel>=1 ) if ( !iam ) printf("read colptr[%d] = %d\n", *ncol, (*colptr)[*ncol]); #endif ReadVector(fp, *nonz, *rowind, rownum, rowsize); #if ( DEBUGlevel>=1 ) if ( !iam ) printf("read rowind[%d] = %d\n", *nonz-1, (*rowind)[*nonz-1]); #endif if ( numer_lines ) { zReadValues(fp, *nonz, *nzval, valnum, valsize); } fclose(fp); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(0, "Exit zreadhb_dist()"); #endif } /* Eat up the rest of the current line */ static int DumpLine(FILE *fp) { register int c; while ((c = fgetc(fp)) != '\n') ; return 0; } static int ParseIntFormat(char *buf, int_t *num, int_t *size) { char *tmp; tmp = buf; while (*tmp++ != '(') ; *num = atoi(tmp); while (*tmp != 'I' && *tmp != 'i') ++tmp; ++tmp; *size = atoi(tmp); return 0; } static int ParseFloatFormat(char *buf, int_t *num, int_t *size) { char *tmp, *period; tmp = buf; while (*tmp++ != '(') ; *num = atoi(tmp); while (*tmp != 'E' && *tmp != 'e' && *tmp != 'D' && *tmp != 'd' && *tmp != 'F' && *tmp != 'f') { /* May find kP before nE/nD/nF, like (1P6F13.6). In this case the num picked up refers to P, which should be skipped. */ if (*tmp=='p' || *tmp=='P') { ++tmp; *num = atoi(tmp); /*sscanf(tmp, "%d", num);*/ } else { ++tmp; } } ++tmp; period = tmp; while (*period != '.' && *period != ')') ++period ; *period = '\0'; *size = atoi(tmp); return 0; } static void ReadVector(FILE *fp, int_t n, int_t *where, int_t perline, int_t persize) { register int_t i, j, item; char tmp, buf[100]; i = 0; while (i < n) { fgets(buf, 100, fp); /* read a line at a time */ for (j=0; j Purpose ======= sp_ienv_dist() is inquired to choose machine-dependent parameters for the local environment. See ISPEC for a description of the parameters. This version provides a set of parameters which should give good, but not optimal, performance on many of the currently available computers. Users are encouraged to modify this subroutine to set the tuning parameters for their particular machine using the option and problem size information in the arguments. Arguments ========= ISPEC (input) int Specifies the parameter to be returned as the value of SP_IENV_DIST. = 1: the panel size w; a panel consists of w consecutive columns of matrix A in the process of Gaussian elimination. The best value depends on machine's cache characters. = 2: the relaxation parameter relax; if the number of nodes (columns) in a subtree of the elimination tree is less than relax, this subtree is considered as one supernode, regardless of the their row structures. = 3: the maximum size for a supernode, which must be greater than or equal to relaxation parameter (see case 2); = 4: the minimum row dimension for 2-D blocking to be used; = 5: the minimum column dimension for 2-D blocking to be used; = 6: the estimated fills factor for the adjacency structures of L and U, compared with A; = 7: the minimum value of the product M*N*K for a GEMM call to be off-loaded to accelerator (e.g., GPU, Xeon Phi). (SP_IENV_DIST) (output) int >= 0: the value of the parameter specified by ISPEC < 0: if SP_IENV_DIST = -k, the k-th argument had an illegal value. =====================================================================
*/ #include #include int_t sp_ienv_dist(int_t ispec) { // printf(" this function called\n"); int i; char* ttemp; switch (ispec) { #if ( MACH==CRAY_T3E ) case 2: return (6); case 3: return (30); #elif ( MACH==IBM ) case 2: return (20); case 3: return (100); #else case 2: ttemp = getenv("NREL"); if(ttemp) { return(atoi(ttemp)); } else return 20; case 3: ttemp = getenv("NSUP"); if(ttemp) { return(atoi(ttemp)); } else return 128; #endif case 6: ttemp = getenv("FILL"); if ( ttemp ) return(atoi(ttemp)); else return (5); case 7: ttemp = getenv ("N_GEMM"); if (ttemp) return atoi (ttemp); else return 10000; } /* Invalid value for ISPEC */ i = 1; xerr_dist("sp_ienv", &i); return 0; } /* sp_ienv_dist */ SuperLU_DIST_5.3.0/EXAMPLE/pzgstrs_lsum_Bsend.c0000644013363400111340000003215113233431301017764 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Performs block modifications * *
 * -- Distributed SuperLU routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * March 15, 2003
 *
 * Modified:
 *     Feburary 7, 2001    use MPI_Isend/MPI_Irecv
 *     October 2, 2001     use MPI_Isend/MPI_Irecv with MPI_Test
 * 
*/ #include "superlu_zdefs.h" #if 0 #define ISEND_IRECV #else #define BSEND #endif /* * Function prototypes */ #ifdef _CRAY fortran void CTRSM(_fcd, _fcd, _fcd, _fcd, int*, int*, doublecomplex*, doublecomplex*, int*, doublecomplex*, int*); fortran void CGEMM(_fcd, _fcd, int*, int*, int*, doublecomplex*, doublecomplex*, int*, doublecomplex*, int*, doublecomplex*, doublecomplex*, int*); _fcd ftcs1; _fcd ftcs2; _fcd ftcs3; #endif /************************************************************************/ /*! \brief * *
 * Purpose
 * =======
 *   Perform local block modifications: lsum[i] -= L_i,k * X[k].
 * 
*/ void zlsum_fmod /************************************************************************/ ( doublecomplex *lsum, /* Sum of local modifications. */ doublecomplex *x, /* X array (local) */ doublecomplex *xk, /* X[k]. */ doublecomplex *rtemp, /* Result of full matrix-vector multiply. */ int nrhs, /* Number of right-hand sides. */ int knsupc, /* Size of supernode k. */ int_t k, /* The k-th component of X. */ int_t *fmod, /* Modification count for L-solve. */ int_t nlb, /* Number of L blocks. */ int_t lptr, /* Starting position in lsub[*]. */ int_t luptr, /* Starting position in lusup[*]. */ int_t *xsup, gridinfo_t *grid, LocalLU_t *Llu, MPI_Request send_req[], SuperLUStat_t *stat ) { doublecomplex alpha = {1.0, 0.0}, beta = {0.0, 0.0}; doublecomplex *lusup, *lusup1; doublecomplex *dest; int iam, iknsupc, myrow, nbrow, nsupr, nsupr1, p, pi; int_t i, ii, ik, il, ikcol, irow, j, lb, lk, rel; int_t *lsub, *lsub1, nlb1, lptr1, luptr1; int_t *ilsum = Llu->ilsum; /* Starting position of each supernode in lsum. */ int_t *frecv = Llu->frecv; int_t **fsendx_plist = Llu->fsendx_plist; MPI_Status status; int test_flag; iam = grid->iam; myrow = MYROW( iam, grid ); lk = LBj( k, grid ); /* Local block number, column-wise. */ lsub = Llu->Lrowind_bc_ptr[lk]; lusup = Llu->Lnzval_bc_ptr[lk]; nsupr = lsub[1]; for (lb = 0; lb < nlb; ++lb) { ik = lsub[lptr]; /* Global block number, row-wise. */ nbrow = lsub[lptr+1]; #ifdef _CRAY CGEMM( ftcs2, ftcs2, &nbrow, &nrhs, &knsupc, &alpha, &lusup[luptr], &nsupr, xk, &knsupc, &beta, rtemp, &nbrow ); #else zgemm_( "N", "N", &nbrow, &nrhs, &knsupc, &alpha, &lusup[luptr], &nsupr, xk, &knsupc, &beta, rtemp, &nbrow ); #endif stat->ops[SOLVE] += 8 * nbrow * nrhs * knsupc + 2 * nbrow * nrhs; lk = LBi( ik, grid ); /* Local block number, row-wise. */ iknsupc = SuperSize( ik ); il = LSUM_BLK( lk ); dest = &lsum[il]; lptr += LB_DESCRIPTOR; rel = xsup[ik]; /* Global row index of block ik. */ for (i = 0; i < nbrow; ++i) { irow = lsub[lptr++] - rel; /* Relative row. */ RHS_ITERATE(j) z_sub(&dest[irow + j*iknsupc], &dest[irow + j*iknsupc], &rtemp[i + j*nbrow]); } luptr += nbrow; if ( (--fmod[lk])==0 ) { /* Local accumulation done. */ ikcol = PCOL( ik, grid ); p = PNUM( myrow, ikcol, grid ); if ( iam != p ) { #ifdef ISEND_IRECV #if 1 MPI_Test( &send_req[myrow], &test_flag, &status ); #else if ( send_req[myrow] != MPI_REQUEST_NULL ) MPI_Wait( &send_req[myrow], &status ); #endif MPI_Isend( &lsum[il - LSUM_H], iknsupc * nrhs + LSUM_H, SuperLU_MPI_DOUBLE_COMPLEX, p, LSUM, grid->comm, &send_req[myrow] ); #else #ifdef BSEND MPI_Bsend( &lsum[il - LSUM_H], iknsupc * nrhs + LSUM_H, SuperLU_MPI_DOUBLE_COMPLEX, p, LSUM, grid->comm ); #else MPI_Send( &lsum[il - LSUM_H], iknsupc * nrhs + LSUM_H, SuperLU_MPI_DOUBLE_COMPLEX, p, LSUM, grid->comm ); #endif #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent LSUM[%2.0f], size %2d, to P %2d\n", iam, lsum[il-LSUM_H], iknsupc*nrhs+LSUM_H, p); #endif } else { /* Diagonal process: X[i] += lsum[i]. */ ii = X_BLK( lk ); RHS_ITERATE(j) for (i = 0; i < iknsupc; ++i) z_add(&x[i + ii + j*iknsupc], &x[i + ii + j*iknsupc], &lsum[i + il + j*iknsupc]); if ( frecv[lk]==0 ) { /* Becomes a leaf node. */ fmod[lk] = -1; /* Do not solve X[k] in the future. */ lk = LBj( ik, grid );/* Local block number, column-wise. */ lsub1 = Llu->Lrowind_bc_ptr[lk]; lusup1 = Llu->Lnzval_bc_ptr[lk]; nsupr1 = lsub1[1]; #ifdef _CRAY CTRSM(ftcs1, ftcs1, ftcs2, ftcs3, &iknsupc, &nrhs, &alpha, lusup1, &nsupr1, &x[ii], &iknsupc); #else ztrsm_("L", "L", "N", "U", &iknsupc, &nrhs, &alpha, lusup1, &nsupr1, &x[ii], &iknsupc); #endif stat->ops[SOLVE] += 4 * iknsupc * (iknsupc - 1) * nrhs + 10 * knsupc * nrhs; /* complex division */ #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, ik); #endif /* * Send Xk to process column Pc[k]. */ for (p = 0; p < grid->nprow; ++p) if ( fsendx_plist[lk][p] != EMPTY ) { pi = PNUM( p, ikcol, grid ); #ifdef ISEND_IRECV #if 1 MPI_Test( &send_req[p], &test_flag, &status ); #else if ( send_req[p] != MPI_REQUEST_NULL ) MPI_Wait( &send_req[p], &status ); #endif MPI_Isend( &x[ii - XK_H], iknsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm, &send_req[p] ); #else #ifdef BSEND MPI_Bsend( &x[ii - XK_H], iknsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm ); #else MPI_Send( &x[ii - XK_H], iknsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm ); #endif #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii-XK_H], pi); #endif } /* * Perform local block modifications. */ nlb1 = lsub1[0] - 1; lptr1 = BC_HEADER + LB_DESCRIPTOR + iknsupc; luptr1 = iknsupc; /* Skip diagonal block L(I,I). */ zlsum_fmod(lsum, x, &x[ii], rtemp, nrhs, iknsupc, ik, fmod, nlb1, lptr1, luptr1, xsup, grid, Llu, send_req, stat); #ifdef ISEND_IRECV /* Wait for previous Isends to complete. */ for (p = 0; p < grid->nprow; ++p) { if ( fsendx_plist[lk][p] != EMPTY ) /*MPI_Wait( &send_req[p], &status );*/ MPI_Test( &send_req[p], &test_flag, &status ); } #endif } /* if frecv[lk] == 0 */ } /* if iam == p */ } /* if fmod[lk] == 0 */ } /* for lb ... */ } /* zLSUM_FMOD */ /************************************************************************/ /*! \brief * *
 * Purpose
 * =======
 *   Perform local block modifications: lsum[i] -= U_i,k * X[k].
 * 
*/ void zlsum_bmod /************************************************************************/ ( doublecomplex *lsum, /* Sum of local modifications. */ doublecomplex *x, /* X array (local). */ doublecomplex *xk, /* X[k]. */ int nrhs, /* Number of right-hand sides. */ int_t k, /* The k-th component of X. */ int_t *bmod, /* Modification count for L-solve. */ int_t *Urbs, /* Number of row blocks in each block column of U.*/ Ucb_indptr_t **Ucb_indptr,/* Vertical linked list pointing to Uindex[].*/ int_t **Ucb_valptr, /* Vertical linked list pointing to Unzval[]. */ int_t *xsup, gridinfo_t *grid, LocalLU_t *Llu, MPI_Request send_req[], SuperLUStat_t *stat ) { doublecomplex alpha = {1.0, 0.0}; int iam, iknsupc, knsupc, myrow, nsupr, p, pi; int_t fnz, gik, gikcol, i, ii, ik, ikfrow, iklrow, il, irow, j, jj, lk, lk1, nub, ub, uptr; int_t *usub; doublecomplex *uval, *dest, *y; doublecomplex temp; int_t *lsub; doublecomplex *lusup; int_t *ilsum = Llu->ilsum; /* Starting position of each supernode in lsum. */ int_t *brecv = Llu->brecv; int_t **bsendx_plist = Llu->bsendx_plist; MPI_Status status; int test_flag; iam = grid->iam; myrow = MYROW( iam, grid ); knsupc = SuperSize( k ); lk = LBj( k, grid ); /* Local block number, column-wise. */ nub = Urbs[lk]; for (ub = 0; ub < nub; ++ub) { ik = Ucb_indptr[lk][ub].lbnum; /* Local block number, row-wise. */ usub = Llu->Ufstnz_br_ptr[ik]; uval = Llu->Unzval_br_ptr[ik]; i = Ucb_indptr[lk][ub].indpos; /* Start of the block in usub[]. */ i += UB_DESCRIPTOR; il = LSUM_BLK( ik ); gik = ik * grid->nprow + myrow; /* Global block number, row-wise. */ iknsupc = SuperSize( gik ); ikfrow = FstBlockC( gik ); iklrow = FstBlockC( gik+1 ); RHS_ITERATE(j) { dest = &lsum[il + j*iknsupc]; y = &xk[j*knsupc]; uptr = Ucb_valptr[lk][ub]; /* Start of the block in uval[]. */ for (jj = 0; jj < knsupc; ++jj) { fnz = usub[i + jj]; if ( fnz < iklrow ) { /* Nonzero segment. */ /* AXPY */ for (irow = fnz; irow < iklrow; ++irow) { zz_mult(&temp, &uval[uptr], &y[jj]); z_sub(&dest[irow - ikfrow], &dest[irow - ikfrow], &temp); ++uptr; } stat->ops[SOLVE] += 8 * (iklrow - fnz); } } /* for jj ... */ } --bmod[ik]; if ( !(bmod[ik]) ) { /* Local accumulation done. */ gikcol = PCOL( gik, grid ); p = PNUM( myrow, gikcol, grid ); if ( iam != p ) { #ifdef ISEND_IRECV #if 1 MPI_Test( &send_req[myrow], &test_flag, &status ); #else if ( send_req[myrow] != MPI_REQUEST_NULL ) MPI_Wait( &send_req[myrow], &status ); #endif MPI_Isend( &lsum[il - LSUM_H], iknsupc * nrhs + LSUM_H, SuperLU_MPI_DOUBLE_COMPLEX, p, LSUM, grid->comm, &send_req[myrow] ); #else #ifdef BSEND MPI_Bsend( &lsum[il - LSUM_H], iknsupc * nrhs + LSUM_H, SuperLU_MPI_DOUBLE_COMPLEX, p, LSUM, grid->comm ); #else MPI_Send( &lsum[il - LSUM_H], iknsupc * nrhs + LSUM_H, SuperLU_MPI_DOUBLE_COMPLEX, p, LSUM, grid->comm ); #endif #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent LSUM[%2.0f], size %2d, to P %2d\n", iam, lsum[il-LSUM_H], iknsupc*nrhs+LSUM_H, p); #endif } else { /* Diagonal process: X[i] += lsum[i]. */ ii = X_BLK( ik ); dest = &x[ii]; RHS_ITERATE(j) for (i = 0; i < iknsupc; ++i) z_add(&dest[i + j*iknsupc], &dest[i + j*iknsupc], &lsum[i + il + j*iknsupc]); if ( !brecv[ik] ) { /* Becomes a leaf node. */ bmod[ik] = -1; /* Do not solve X[k] in the future. */ lk1 = LBj( gik, grid ); /* Local block number. */ lsub = Llu->Lrowind_bc_ptr[lk1]; lusup = Llu->Lnzval_bc_ptr[lk1]; nsupr = lsub[1]; #ifdef _CRAY CTRSM(ftcs1, ftcs3, ftcs2, ftcs2, &iknsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &iknsupc); #else ztrsm_("L", "U", "N", "N", &iknsupc, &nrhs, &alpha, lusup, &nsupr, &x[ii], &iknsupc); #endif stat->ops[SOLVE] += 4 * iknsupc * (iknsupc + 1) * nrhs + 10 * iknsupc * nrhs; /* complex division */ #if ( DEBUGlevel>=2 ) printf("(%2d) Solve X[%2d]\n", iam, gik); #endif /* * Send Xk to process column Pc[k]. */ for (p = 0; p < grid->nprow; ++p) if ( bsendx_plist[lk1][p] != EMPTY ) { pi = PNUM( p, gikcol, grid ); #ifdef ISEND_IRECV #if 1 MPI_Test( &send_req[p], &test_flag, &status ); #else if ( send_req[p] != MPI_REQUEST_NULL ) MPI_Wait( &send_req[p], &status ); #endif MPI_Isend( &x[ii - XK_H], iknsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm, &send_req[p] ); #else #ifdef BSEND MPI_Bsend( &x[ii - XK_H], iknsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm ); #else MPI_Send( &x[ii - XK_H], iknsupc * nrhs + XK_H, SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm ); #endif #endif #if ( DEBUGlevel>=2 ) printf("(%2d) Sent X[%2.0f] to P %2d\n", iam, x[ii-XK_H], pi); #endif } /* * Perform local block modifications. */ if ( Urbs[lk1] ) zlsum_bmod(lsum, x, &x[ii], nrhs, gik, bmod, Urbs, Ucb_indptr, Ucb_valptr, xsup, grid, Llu, send_req, stat); #ifdef ISEND_IRECV /* Wait for the previous Isends to complete. */ for (p = 0; p < grid->nprow; ++p) { if ( bsendx_plist[lk1][p] != EMPTY ) /*MPI_Wait( &send_req[p], &status );*/ MPI_Test( &send_req[p], &test_flag, &status ); } #endif } /* if brecv[ik] == 0 */ } } /* if bmod[ik] == 0 */ } /* for ub ... */ } /* zlSUM_BMOD */ SuperLU_DIST_5.3.0/EXAMPLE/pddrive2_ABglobal.c0000644013363400111340000002320313233431301017335 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Driver program for pdgssvx_ABglobal example * *
 * -- Distributed SuperLU routine (version 4.1) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 * April 5, 2015
 * 
*/ #include #include "superlu_ddefs.h" /*! \brief * *
 * Purpose
 * =======
 *
 * The driver program pddrive2_ABglobal.
 *
 * This example illustrates how to use pdgssvx_ABglobal to solve
 * systems repeatedly with the same sparsity pattern of matrix A.
 * In this case, the column permutation vector ScalePermstruct->perm_c is
 * computed once.  The following data structures will be reused in the
 * subsequent call to pdgssvx_ABglobal:
 *        ScalePermstruct : perm_c
 *        LUstruct        : etree
 *
 * On an IBM SP, the program may be run by typing:
 *    poe pddrive2_ABglobal -r  -c   -procs 

*

*/ int main(int argc, char *argv[]) { superlu_dist_options_t options; SuperLUStat_t stat; SuperMatrix A; ScalePermstruct_t ScalePermstruct; LUstruct_t LUstruct; gridinfo_t grid; double *berr; double *a, *a1, *b, *b1, *xtrue; int_t *asub, *asub1, *xa, *xa1; int_t i, j, m, n, nnz; int_t nprow, npcol; int iam, info, ldb, ldx, nrhs; char trans[1]; char **cpp, c; FILE *fp, *fopen(); extern int cpp_defs(); /* prototypes */ extern void LUstructInit(const int_t, LUstruct_t *); extern void LUstructFree(LUstruct_t *); extern void Destroy_LU(int_t, gridinfo_t *, LUstruct_t *); nprow = 1; /* Default process rows. */ npcol = 1; /* Default process columns. */ nrhs = 1; /* Number of right-hand side. */ /* ------------------------------------------------------------ INITIALIZE MPI ENVIRONMENT. ------------------------------------------------------------*/ MPI_Init( &argc, &argv ); /* Parse command line argv[]. */ for (cpp = argv+1; *cpp; ++cpp) { if ( **cpp == '-' ) { c = *(*cpp+1); ++cpp; switch (c) { case 'h': printf("Options:\n"); printf("\t-r : process rows (default %d)\n", nprow); printf("\t-c : process columns (default %d)\n", npcol); exit(0); break; case 'r': nprow = atoi(*cpp); break; case 'c': npcol = atoi(*cpp); break; } } else { /* Last arg is considered a filename */ if ( !(fp = fopen(*cpp, "r")) ) { ABORT("File does not exist"); } break; } } /* ------------------------------------------------------------ INITIALIZE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ superlu_gridinit(MPI_COMM_WORLD, nprow, npcol, &grid); /* Bail out if I do not belong in the grid. */ iam = grid.iam; if ( iam >= nprow * npcol ) goto out; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter main()"); #endif /* ------------------------------------------------------------ Process 0 reads the matrix A, and then broadcasts it to all the other processes. ------------------------------------------------------------*/ if ( !iam ) { /* Print the CPP definitions. */ cpp_defs(); /* Read the matrix stored on disk in Harwell-Boeing format. */ dreadhb_dist(iam, fp, &m, &n, &nnz, &a, &asub, &xa); printf("Input matrix file: %s\n", *cpp); printf("\tDimension\t%dx%d\t # nonzeros %d\n", m, n, nnz); printf("\tProcess grid\t%d X %d\n", (int) grid.nprow, (int) grid.npcol); /* Broadcast matrix A to the other PEs. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( a, nnz, MPI_DOUBLE, 0, grid.comm ); MPI_Bcast( asub, nnz, mpi_int_t, 0, grid.comm ); MPI_Bcast( xa, n+1, mpi_int_t, 0, grid.comm ); } else { /* Receive matrix A from PE 0. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid.comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid.comm ); /* Allocate storage for compressed column representation. */ dallocateA_dist(n, nnz, &a, &asub, &xa); MPI_Bcast( a, nnz, MPI_DOUBLE, 0, grid.comm ); MPI_Bcast( asub, nnz, mpi_int_t, 0, grid.comm ); MPI_Bcast( xa, n+1, mpi_int_t, 0, grid.comm ); } /* Create compressed column matrix for A. */ dCreate_CompCol_Matrix_dist(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE); /* Generate the exact solution and compute the right-hand side. */ if (!(b=doubleMalloc_dist(m * nrhs))) ABORT("Malloc fails for b[]"); if (!(xtrue=doubleMalloc_dist(n*nrhs))) ABORT("Malloc fails for xtrue[]"); *trans = 'N'; ldx = n; ldb = m; dGenXtrue_dist(n, nrhs, xtrue, ldx); dFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb); /* Save a copy of the right-hand side. */ if ( !(b1 = doubleMalloc_dist(m * nrhs)) ) ABORT("Malloc fails for b1[]"); for (j = 0; j < nrhs; ++j) for (i = 0; i < m; ++i) b1[i+j*ldb] = b[i+j*ldb]; if ( !(berr = doubleMalloc_dist(nrhs)) ) ABORT("Malloc fails for berr[]."); /* Save a copy of the matrix A. */ dallocateA_dist(n, nnz, &a1, &asub1, &xa1); for (i = 0; i < nnz; ++i) { a1[i] = a[i]; asub1[i] = asub[i]; } for (i = 0; i < n+1; ++i) xa1[i] = xa[i]; /* ------------------------------------------------------------ WE SOLVE THE LINEAR SYSTEM FOR THE FIRST TIME. ------------------------------------------------------------*/ /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = METIS_AT_PLUS_A; options.RowPerm = LargeDiag; options.ReplaceTinyPivot = YES; options.Trans = NOTRANS; options.IterRefine = DOUBLE; options.SolveInitialized = NO; options.RefineInitialized = NO; options.PrintStat = YES; */ set_default_options_dist(&options); if (!iam) { print_sp_ienv_dist(&options); print_options_dist(&options); } /* Initialize ScalePermstruct and LUstruct. */ ScalePermstructInit(m, n, &ScalePermstruct); LUstructInit(n, &LUstruct); /* Initialize the statistics variables. */ PStatInit(&stat); /* Call the linear equation solver: factorize and solve. */ pdgssvx_ABglobal(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid, &LUstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ if ( !iam ) { dinf_norm_error_dist(n, nrhs, b, ldb, xtrue, ldx, &grid); } PStatPrint(&options, &stat, &grid); /* Print the statistics. */ PStatFree(&stat); Destroy_CompCol_Matrix_dist(&A); /* Deallocate storage of matrix A. */ Destroy_LU(n, &grid, &LUstruct); /* Deallocate storage associated with the L and U matrices. */ SUPERLU_FREE(b); /* Free storage of right-hand side. */ /* ------------------------------------------------------------ NOW WE SOLVE ANOTHER LINEAR SYSTEM. ONLY THE SPARSITY PATTERN OF MATRIX A IS THE SAME. ------------------------------------------------------------*/ options.Fact = SamePattern; PStatInit(&stat); /* Initialize the statistics variables. */ /* Create compressed column matrix for A. */ dCreate_CompCol_Matrix_dist(&A, m, n, nnz, a1, asub1, xa1, SLU_NC, SLU_D, SLU_GE); /* Solve the linear system. */ pdgssvx_ABglobal(&options, &A, &ScalePermstruct, b1, ldb, nrhs, &grid, &LUstruct, berr, &stat, &info); /* Check the accuracy of the solution. */ if ( !iam ) { printf("Solve the system with the same sparsity pattern.\n"); dinf_norm_error_dist(n, nrhs, b1, ldb, xtrue, ldx, &grid); } /* Print the statistics. */ PStatPrint(&options, &stat, &grid); /* ------------------------------------------------------------ DEALLOCATE STORAGE. ------------------------------------------------------------*/ PStatFree(&stat); Destroy_CompCol_Matrix_dist(&A); /* Deallocate storage of matrix A. */ Destroy_LU(n, &grid, &LUstruct); /* Deallocate storage associated with the L and U matrices. */ ScalePermstructFree(&ScalePermstruct); LUstructFree(&LUstruct); /* Deallocate the structure of L and U.*/ SUPERLU_FREE(b1); /* Free storage of right-hand side. */ SUPERLU_FREE(xtrue); /* Free storage of the exact solution. */ SUPERLU_FREE(berr); /* ------------------------------------------------------------ RELEASE THE SUPERLU PROCESS GRID. ------------------------------------------------------------*/ out: superlu_gridexit(&grid); /* ------------------------------------------------------------ TERMINATES THE MPI EXECUTION ENVIRONMENT. ------------------------------------------------------------*/ MPI_Finalize(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Exit main()"); #endif } int cpp_defs() { printf(".. CPP definitions:\n"); #if ( PRNTlevel>=1 ) printf("\tPRNTlevel = %d\n", PRNTlevel); #endif #if ( DEBUGlevel>=1 ) printf("\tDEBUGlevel = %d\n", DEBUGlevel); #endif #if ( PROFlevel>=1 ) printf("\tPROFlevel = %d\n", PROFlevel); #endif #if ( StaticPivot>=1 ) printf("\tStaticPivot = %d\n", StaticPivot); #endif printf("....\n"); return 0; } SuperLU_DIST_5.3.0/FORTRAN/0000755013363400111340000000000013233431301013747 5ustar xiaoyessgSuperLU_DIST_5.3.0/FORTRAN/Makefile0000644013363400111340000000243413233431301015412 0ustar xiaoyessg####################################################################### # # This makefile creates the Fortran example programs for the # linear equation routines in SuperLU_DIST. # # Creation date: July 29, 2003 version 2.0 # Modified: Oct. 22, 2012 version 3.2 # ####################################################################### .SUFFIXES: .SUFFIXES: .f90 .c .o include ../make.inc INCLUDEDIR = -I../SRC #F90FLAGS = $(FFLAGS) -qfree -qsuffix=f=f90 -qflag=w:w F_MOD = superlupara.o superlu_mod.o C_DWRAP = dcreate_dist_matrix.o superlu_c2f_dwrap.o C_ZWRAP = zcreate_dist_matrix.o superlu_c2f_zwrap.o F_DEXM = $(F_MOD) dhbcode1.o f_pddrive.o F_ZEXM = $(F_MOD) zhbcode1.o f_pzdrive.o F_5x5 = $(F_MOD) f_5x5.o sp_ienv.o all: f_pddrive f_pzdrive f_5x5 f_pddrive: $(F_DEXM) $(C_DWRAP) $(DSUPERLULIB) $(FORTRAN) $(LOADOPTS) $(F_DEXM) $(C_DWRAP) $(LIBS) -o $@ f_5x5: $(F_5x5) $(C_DWRAP) $(DSUPERLULIB) $(FORTRAN) $(LOADOPTS) $(F_5x5) $(C_DWRAP) $(LIBS) -o $@ f_pzdrive: $(F_ZEXM) $(C_ZWRAP) $(DSUPERLULIB) $(FORTRAN) $(LOADOPTS) $(F_ZEXM) $(C_ZWRAP) $(LIBS) -o $@ .c.o: $(CC) $(CFLAGS) $(CDEFS) $(BLASDEF) $(INCLUDEDIR) -c $< $(VERBOSE) .f90.o: $(FORTRAN) $(F90FLAGS) -c $< $(VERBOSE) .f.o: $(FORTRAN) $(FFLAGS) -c $< $(VERBOSE) clean: rm -f *.o *.mod f_*drive f_5x5 SuperLU_DIST_5.3.0/FORTRAN/superlu_c2f_zwrap.c0000644013363400111340000002316413233431301017575 0ustar xiaoyessg /*! @file * \brief C interface functions for the Fortran90 wrapper. * *
 * -- Distributed SuperLU routine (version 4.1) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * October 2012
 * April 5, 2015
 */

#include "superlu_zdefs.h"
#include "Cnames.h"

/* kind of integer to hold a pointer.  Use int.
   This might need to be changed on systems with large memory.
   If changed, be sure to change it in superlupara.f90 too */

#if 0
typedef int fptr;  /* 32-bit */
#else
typedef long long int fptr;  /* 64-bit */
#endif


/* some MPI implementations may require conversion between a Fortran
   communicator and a C communicator.  This routine is used to perform the
   conversion.  It may need different forms for different MPI libraries. */

/* NO_MPI2 should be defined on the compiler command line if the MPI
   library does not provide MPI_Comm_f2c */

MPI_Comm f2c_comm(int *f_comm)
{
#ifndef NO_MPI2

/* MPI 2 provides a standard way of doing this */
   return MPI_Comm_f2c((MPI_Fint)(*f_comm));
#else

/* will probably need some special cases here */
/* when in doubt, just return the input */
   return (MPI_Comm)(*f_comm);
#endif
}


/* functions that create memory for a struct and return a handle */

void f_create_gridinfo_handle(fptr *handle)
{
   *handle = (fptr) SUPERLU_MALLOC(sizeof(gridinfo_t));
}

void f_create_options_handle(fptr *handle)
{
   *handle = (fptr) SUPERLU_MALLOC(sizeof(superlu_dist_options_t));
}

void f_create_ScalePerm_handle(fptr *handle)
{
   *handle = (fptr) SUPERLU_MALLOC(sizeof(ScalePermstruct_t));
}

void f_create_LUstruct_handle(fptr *handle)
{
   *handle = (fptr) SUPERLU_MALLOC(sizeof(LUstruct_t));
}

void f_create_SOLVEstruct_handle(fptr *handle)
{
   *handle = (fptr) SUPERLU_MALLOC(sizeof(SOLVEstruct_t));
}

void f_create_SuperMatrix_handle(fptr *handle)
{
   *handle = (fptr) SUPERLU_MALLOC(sizeof(SuperMatrix));
}

void f_create_SuperLUStat_handle(fptr *handle)
{
   *handle = (fptr) SUPERLU_MALLOC(sizeof(SuperLUStat_t));
}

/* functions that free the memory allocated by the above functions */

void f_destroy_gridinfo_handle(fptr *handle)
{
   SUPERLU_FREE((void *)*handle);
}

void f_destroy_options_handle(fptr *handle)
{
   SUPERLU_FREE((void *)*handle);
}

void f_destroy_ScalePerm_handle(fptr *handle)
{
   SUPERLU_FREE((void *)*handle);
}

void f_destroy_LUstruct_handle(fptr *handle)
{
   SUPERLU_FREE((void *)*handle);
}

void f_destroy_SOLVEstruct_handle(fptr *handle)
{
   SUPERLU_FREE((void *)*handle);
}

void f_destroy_SuperMatrix_handle(fptr *handle)
{
   SUPERLU_FREE((void *)*handle);
}

void f_destroy_SuperLUStat_handle(fptr *handle)
{
   SUPERLU_FREE((void *)*handle);
}

/* functions that get or set values in a C struct.
   This is not the complete set of structs for which a user might want
   to get/set a component, and there may be missing components. */

void f_get_gridinfo(fptr *grid, int *iam, int_t *nprow, int_t *npcol)
{
  *iam=((gridinfo_t *) *grid)->iam;
  *npcol=((gridinfo_t *) *grid)->npcol;
  *nprow=((gridinfo_t *) *grid)->nprow;
}

void f_get_SuperMatrix(fptr *A, int_t *nrow, int_t *ncol)
{
   *nrow = ((SuperMatrix *) *A)->nrow;
   *ncol = ((SuperMatrix *) *A)->ncol;
}

void f_set_SuperMatrix(fptr *A, int_t *nrow, int_t *ncol)
{
   ((SuperMatrix *) *A)->nrow = *nrow;
   ((SuperMatrix *) *A)->ncol = *ncol;
}

void f_get_CompRowLoc_Matrix(fptr *A, int_t *m, int_t *n, int_t *nnz_loc,
			     int_t *m_loc, int_t *fst_row)
{
  *m=((SuperMatrix *) *A)->nrow;
  *n=((SuperMatrix *) *A)->ncol;
  *m_loc=((NRformat_loc *) ((SuperMatrix *) *A)->Store)->m_loc;
  *nnz_loc=((NRformat_loc *) ((SuperMatrix *) *A)->Store)->nnz_loc;
  *fst_row=((NRformat_loc *) ((SuperMatrix *) *A)->Store)->fst_row;
}

void f_set_CompRowLoc_Matrix(fptr *A, int_t *m, int_t *n, int_t *nnz_loc,
			     int_t *m_loc, int_t *fst_row)
{
  ((SuperMatrix *) *A)->nrow = *m;
  ((SuperMatrix *) *A)->ncol = *n;
  ((NRformat_loc *) ((SuperMatrix *) *A)->Store)->m_loc = *m_loc;
  ((NRformat_loc *) ((SuperMatrix *) *A)->Store)->nnz_loc = *nnz_loc;
  ((NRformat_loc *) ((SuperMatrix *) *A)->Store)->fst_row = *fst_row;
}

void f_get_superlu_options(fptr *opt, int *Fact, int *Equil, int *ParSymbFact,
                           int *ColPerm, int *RowPerm, int *IterRefine,
			   int *Trans, int *ReplaceTinyPivot,
			   int *SolveInitialized, int *RefineInitialized,
			   int *PrintStat)
{
   *Fact = (int) ((superlu_dist_options_t *) *opt)->Fact;
   *Equil = (int) ((superlu_dist_options_t *) *opt)->Equil;
   *ParSymbFact = (int) ((superlu_dist_options_t *) *opt)->ParSymbFact;
   *ColPerm = (int) ((superlu_dist_options_t *) *opt)->ColPerm;
   *RowPerm = (int) ((superlu_dist_options_t *) *opt)->RowPerm;
   *IterRefine = (int) ((superlu_dist_options_t *) *opt)->IterRefine;
   *Trans = (int) ((superlu_dist_options_t *) *opt)->Trans;
   *ReplaceTinyPivot = (int) ((superlu_dist_options_t *) *opt)->ReplaceTinyPivot;
   *SolveInitialized = (int) ((superlu_dist_options_t *) *opt)->SolveInitialized;
   *RefineInitialized = (int) ((superlu_dist_options_t *) *opt)->RefineInitialized;
   *PrintStat = (int) ((superlu_dist_options_t *) *opt)->PrintStat;
}

void f_set_superlu_options(fptr *opt, int *Fact, int *Equil, int *ParSymbFact,
                           int *ColPerm, int *RowPerm, int *IterRefine,
			   int *Trans, int *ReplaceTinyPivot,
			   int *SolveInitialized, int *RefineInitialized,
			   int *PrintStat)
{
    superlu_dist_options_t *l_options = (superlu_dist_options_t*) *opt;
    l_options->Fact = (fact_t) *Fact;
   ((superlu_dist_options_t *) *opt)->Equil = (yes_no_t) *Equil;
   ((superlu_dist_options_t *) *opt)->ParSymbFact = (yes_no_t) *ParSymbFact;
   ((superlu_dist_options_t *) *opt)->ColPerm = (colperm_t) *ColPerm;
   ((superlu_dist_options_t *) *opt)->RowPerm = (rowperm_t) *RowPerm;
   ((superlu_dist_options_t *) *opt)->IterRefine = (IterRefine_t) *IterRefine;
   ((superlu_dist_options_t *) *opt)->Trans = (trans_t) *Trans;
   ((superlu_dist_options_t *) *opt)->ReplaceTinyPivot = (yes_no_t) *ReplaceTinyPivot;
   ((superlu_dist_options_t *) *opt)->SolveInitialized = (yes_no_t) *SolveInitialized;
   ((superlu_dist_options_t *) *opt)->RefineInitialized = (yes_no_t) *RefineInitialized;
   ((superlu_dist_options_t *) *opt)->PrintStat = (yes_no_t) *PrintStat;
}

/* wrappers for SuperLU functions */

void f_set_default_options(fptr *options)
{
   set_default_options_dist((superlu_dist_options_t *) *options);
}

void f_superlu_gridinit(int *Bcomm, int_t *nprow, int_t *npcol, fptr *grid)
{
  
   superlu_gridinit(f2c_comm(Bcomm), *nprow, *npcol, (gridinfo_t *) *grid);
}

void f_superlu_gridmap(int *Bcomm, int_t *nprow, int_t *npcol, 
                       int_t *usermap, int_t *ldumap,
	 fptr *grid)
{
   superlu_gridmap(f2c_comm(Bcomm), *nprow, *npcol, usermap, *ldumap, (gridinfo_t *) *grid);
}

void f_superlu_gridexit(fptr *grid)
{
   superlu_gridexit((gridinfo_t *) *grid);
}

void f_ScalePermstructInit(int_t *m, int_t *n, fptr *ScalePermstruct)
{
   ScalePermstructInit(*m, *n, (ScalePermstruct_t *) *ScalePermstruct);
}

void f_ScalePermstructFree(fptr *ScalePermstruct)
{
   ScalePermstructFree((ScalePermstruct_t *) *ScalePermstruct);
}

void f_PStatInit(fptr *stat)
{
   PStatInit((SuperLUStat_t *) *stat);
}

void f_PStatFree(fptr *stat)
{
   PStatFree((SuperLUStat_t *) *stat);
}

void f_LUstructInit(int_t *m, int_t *n, fptr *LUstruct)
{
   extern void LUstructInit(const int_t, LUstruct_t *);

   LUstructInit(*m, (LUstruct_t *) *LUstruct);
}

void f_LUstructFree(fptr *LUstruct)
{
   extern void LUstructFree(LUstruct_t *);

   LUstructFree((LUstruct_t *) *LUstruct);
}

void f_Destroy_LU(int_t *n, fptr *grid, fptr *LUstruct)
{
   Destroy_LU(*n, (gridinfo_t *) *grid, (LUstruct_t *) *LUstruct);
}

void f_zCreate_CompRowLoc_Mat_dist(fptr *A, int_t *m, int_t *n, int_t *nnz_loc,
				   int_t *m_loc, int_t *fst_row, doublecomplex *nzval,
				   int_t *colind, int_t *rowptr, int *stype,
				   int *dtype, int *mtype)
{
   zCreate_CompRowLoc_Matrix_dist((SuperMatrix *) *A, *m, *n, *nnz_loc, *m_loc,
                                  *fst_row, (doublecomplex *) nzval, colind, rowptr,
                                  (Stype_t) *stype, (Dtype_t) *dtype,
                                  (Mtype_t) *mtype);
}

void f_Destroy_CompRowLoc_Mat_dist(fptr *A)
{
   Destroy_CompRowLoc_Matrix_dist((SuperMatrix *) *A);
}

void f_Destroy_SuperMat_Store_dist(fptr *A)
{
   Destroy_SuperMatrix_Store_dist((SuperMatrix *) *A);
}

void f_zSolveFinalize(fptr *options, fptr *SOLVEstruct)
{
   zSolveFinalize((superlu_dist_options_t *) *options,
                  (SOLVEstruct_t *) *SOLVEstruct);
}

void f_pzgssvx(fptr *options, fptr *A, fptr *ScalePermstruct, doublecomplex *B,
               int *ldb, int *nrhs, fptr *grid, fptr *LUstruct,
               fptr *SOLVEstruct, double *berr, fptr *stat, int *info)
{
    pzgssvx((superlu_dist_options_t *) *options, (SuperMatrix *) *A,
	    (ScalePermstruct_t *) *ScalePermstruct, B, *ldb, *nrhs,
	    (gridinfo_t *) *grid, (LUstruct_t *) *LUstruct,
	    (SOLVEstruct_t *) *SOLVEstruct, berr,
	    (SuperLUStat_t *) *stat, info);

    PStatPrint((superlu_dist_options_t *) *options, (SuperLUStat_t *) *stat,
	       (gridinfo_t *) *grid);
}

/* Create the distributed matrix */

void f_zcreate_dist_matrix(fptr *A, int_t *m, int_t *n, int_t *nnz,
			   doublecomplex *nzval, int_t *rowind, int_t *colptr,
			   fptr *grid)
{
   int zcreate_dist_matrix(SuperMatrix *, int_t, int_t, int_t, doublecomplex *,
			   int_t * , int_t *, gridinfo_t *);

   zcreate_dist_matrix((SuperMatrix *) *A, (int_t) *m, *n, *nnz, 
		       (doublecomplex *) nzval, (int_t *) rowind, (int_t *) colptr,
		       (gridinfo_t *) *grid);

}

/* Check malloc */

void f_check_malloc(int *iam)
{
#if ( DEBUGlevel>=1 )
    CHECK_MALLOC((int_t) *iam, "Check Malloc");
#endif
}
SuperLU_DIST_5.3.0/FORTRAN/f_5x5.f900000644013363400111340000001552513233431301015225 0ustar  xiaoyessg
! -- Distributed SuperLU routine (version 2.0) --
! Lawrence Berkeley National Lab, Univ. of California Berkeley.
! July 20, 2004
!
!
      program f_5x5
! 
! Purpose
! =======
!
! This example illustrates how to use F_PDGSSVX with the full
! (default) options to solve a linear system.
! The input matrix is a small 5x5 example appeared in SuperLU Users' Guide,,
! Section 2.2:
!
!   [ s     u  u    ]     [ 19      21  21    ]
!   [ l  u          ]     [ 12  21            ]
!   [    l  p       ]  =  [     12  16        ]
!   [          e  u ]     [             5  21 ]
!   [ l  l        r ]     [ 12  12         18 ]
!
! It is set up to use 2 processors:
!    processor 1 contains the first 2 rows
!    processor 2 contains the last 3 rows
!
! Seven basic steps are required:
!   1. Create C structures used in SuperLU_DIST
!   2. Initialize the MPI environment and the SuperLU process grid
!   3. Set up the input matrix and the right-hand side
!   4. Set the options argument
!   5. Call f_pdgssvx
!   6. Release the process grid and terminate the MPI environment
!   7. Release all structures
!
      use superlu_mod
!      implicit none
      include 'mpif.h'
      integer maxn, maxnz, maxnrhs
      parameter ( maxn = 10, maxnz = 100, maxnrhs = 10 )
      integer colind(maxnz), rowptr(maxn+1)
      real*8  nzval(maxnz), b(maxn), berr(maxnrhs)
      integer n, m, nnz, nrhs, ldb, nprow, npcol, init
      integer*4 iam, info, i, ierr, ldb4
      integer nnz_loc, m_loc, fst_row
      real*8  s, u, p, e, r, l

      integer(superlu_ptr) :: grid
      integer(superlu_ptr) :: options
      integer(superlu_ptr) :: ScalePermstruct
      integer(superlu_ptr) :: LUstruct
      integer(superlu_ptr) :: SOLVEstruct
      integer(superlu_ptr) :: A
      integer(superlu_ptr) :: stat

! Initialize MPI environment 
      call mpi_init(ierr)

! Check malloc
!      call f_check_malloc(iam)

! Create Fortran handles for the C structures used in SuperLU_DIST
      call f_create_gridinfo_handle(grid)
      call f_create_options_handle(options)
      call f_create_ScalePerm_handle(ScalePermstruct)
      call f_create_LUstruct_handle(LUstruct)
      call f_create_SOLVEstruct_handle(SOLVEstruct)
      call f_create_SuperMatrix_handle(A)
      call f_create_SuperLUStat_handle(stat)

! Initialize the SuperLU_DIST process grid
      nprow = 1
      npcol = 2
      call f_superlu_gridinit(MPI_COMM_WORLD, nprow, npcol, grid)

! Bail out if I do not belong in the grid. 
      call get_GridInfo(grid, iam=iam)
      if ( iam >= nprow * npcol ) then 
         go to 100
      endif
      if ( iam == 0 ) then 
         write(*,*) ' Process grid ', nprow, ' X ', npcol
         write(*,*) ' default integer size ', kind(0) 
      endif
!
!*************************************************************************
! Set up the input matrix A
!*************************************************************************
! The input matrix is a small 5x5 example appeared in SuperLU Users' Guide:
!
!   [ s     u  u    ]     [ 19      21  21    ]
!   [ l  u          ]     [ 12  21            ]
!   [    l  p       ]  =  [     12  16        ]
!   [          e  u ]     [             5  21 ]
!   [ l  l        r ]     [ 12  12         18 ]
!
! It is set up to use 2 processors:
!    processor 1 contains the first 2 rows
!    processor 2 contains the last 3 rows
!
      m = 5
      n = 5
      nnz = 12
      s = 19.0
      u = 21.0
      p = 16.0
      e = 5.0
      r = 18.0
      l = 12.0
!      
      if ( iam == 0 ) then
! Processor 0 owns the first 2 rows of the matrix
! NOTE: 0-based indexing must be used for the C routines.
         nnz_loc   = 5
         m_loc     = 2
         fst_row   = 0         ! 0-based indexing
         nzval(1)  = s
         colind(1) = 0         ! 0-based indexing
         nzval(2)  = u
         colind(2) = 2
         nzval(3)  = u
         colind(3) = 3
         nzval(4)  = l
         colind(4) = 0
         nzval(5)  = u
         colind(5) = 1
         rowptr(1) = 0         ! 0-based indexing
         rowptr(2) = 3
         rowptr(3) = 5
      else
! Processor 1 owns the last 3 rows of the matrix
         nnz_loc   = 7
         m_loc     = 3
         fst_row   = 2         ! 0-based indexing
         nzval(1)  = l
         colind(1) = 1
         nzval(2)  = p
         colind(2) = 2
         nzval(3)  = e
         colind(3) = 3
         nzval(4)  = u
         colind(4) = 4
         nzval(5)  = l
         colind(5) = 0
         nzval(6)  = l
         colind(6) = 1
         nzval(7)  = r
         colind(7) = 4
         rowptr(1) = 0         ! 0-based indexing
         rowptr(2) = 2
         rowptr(3) = 4
         rowptr(4) = 7
      endif

      if ( iam == 0 ) then 
         write(*,*) ' Matrix A was set up'
      endif

! Create the distributed compressed row matrix pointed to by the F90 handle A
      call f_dCreate_CompRowLoc_Mat_dist(A, m, n, nnz_loc, m_loc, fst_row, &
           nzval, colind, rowptr, SLU_NR_loc, SLU_D, SLU_GE)

! Setup the right hand side
      call get_CompRowLoc_Matrix(A, nrow_loc=ldb)
      do i = 1, ldb
         b(i) = 1.0
      enddo
      nrhs = 1
      ldb4 = ldb

! Set the default input options
      call f_set_default_options(options)

! Modify one or more options
      call set_superlu_options(options,ColPerm=NATURAL)
      call set_superlu_options(options,RowPerm=NOROWPERM)

! Initialize ScalePermstruct and LUstruct
      call get_SuperMatrix(A,nrow=m,ncol=n)
      call f_ScalePermstructInit(m, n, ScalePermstruct)
      call f_LUstructInit(m, n, LUstruct)

! Initialize the statistics variables
      call f_PStatInit(stat)

! Call the linear equation solver
      call f_pdgssvx(options, A, ScalePermstruct, b, ldb4, nrhs, &
                     grid, LUstruct, SOLVEstruct, berr, stat, info)

      if (info == 0 .and. iam == 1) then
         write (*,*) 'Backward error: ', (berr(i), i = 1, nrhs)
      else
         write(*,*) 'INFO from f_pdgssvx = ', info
      endif

! Deallocate the storage allocated by SuperLU_DIST
      call f_PStatFree(stat)
      call f_Destroy_SuperMat_Store_dist(A)
      call f_ScalePermstructFree(ScalePermstruct)
      call f_Destroy_LU(n, grid, LUstruct)
      call f_LUstructFree(LUstruct)
      call get_superlu_options(options, SolveInitialized=init)
      if (init == YES) then
         call f_dSolveFinalize(options, SOLVEstruct)
      endif

! Release the SuperLU process grid
100   call f_superlu_gridexit(grid)

! Deallocate the C structures pointed to by the Fortran handles
      call f_destroy_gridinfo_handle(grid)
      call f_destroy_options_handle(options)
      call f_destroy_ScalePerm_handle(ScalePermstruct)
      call f_destroy_LUstruct_handle(LUstruct)
      call f_destroy_SOLVEstruct_handle(SOLVEstruct)
      call f_destroy_SuperMatrix_handle(A)
      call f_destroy_SuperLUStat_handle(stat)

! Check malloc
!      call f_check_malloc(iam)

! Terminate the MPI execution environment
      call mpi_finalize(ierr)

      stop
      end
SuperLU_DIST_5.3.0/FORTRAN/f_pddrive_ABglobal.f0000644013363400111340000000403713233431301017607 0ustar  xiaoyessg!
! -- Distributed SuperLU routine (version 2.0) --
! Lawrence Berkeley National Lab, Univ. of California Berkeley.
! July 10, 2003
!
!
      program f_pddrive_ABglobal
      include 'mpif.h'
      implicit none
      integer maxn, maxnz, maxnrhs
      parameter ( maxn = 10000, maxnz = 100000, maxnrhs = 10 )
      integer rowind(maxnz), colptr(maxn)
      real*8  values(maxnz), b(maxn), berr(maxnrhs)
      integer n, nnz, nrhs, ldb, i, ierr, info, iopt
      integer nprow, npcol
      integer factors_handle(8), grid_handle(8)
!
      call mpi_init(ierr)
!
!     Read Harwell-Boeing matrix
      call hbcode1(n, n, nnz, values, rowind, colptr)
!
!     Adjust to 0-based indexing which is required by the C routines.
      do i = 1, n+1
         colptr(i) = colptr(i) - 1;
      end do
      do i = 1, nnz
         rowind(i) = rowind(i) - 1;
      end do

      nrhs = 1
      ldb = n
      do i = 1, n
         b(i) = 1.0
      enddo
!
      iopt = 1
      nprow = 2
      npcol = 2
      call c_fortran_slugrid(iopt, MPI_COMM_WORLD, nprow, npcol,
     $     grid_handle)
!
! Only performs LU factorization
!
      iopt = 1
      call c_fortran_pdgssvx_ABglobal(iopt, n, nnz, nrhs,
     $     values, rowind, colptr, b, ldb, grid_handle, berr,
     $     factors_handle, info)
!
! Now performs triangular solve with the existing factors
!
      iopt = 3
      call c_fortran_pdgssvx_ABglobal(iopt, n, nnz, nrhs,
     $     values, rowind, colptr, b, ldb, grid_handle, berr,
     $     factors_handle, info)
!
      if (info .eq. 0) then
         write (*,*) 'Backward error: ', (berr(i), i = 1, nrhs)
      else
         write(*,*) 'INFO from c_fortran_pdgssvx_ABglobal = ', info
      endif
!
! Now free the storage associated with the handles
!
      iopt = 4
      call c_fortran_pdgssvx_ABglobal(iopt, n, nnz, nrhs,
     $     values, rowind, colptr, b, ldb, grid_handle, berr,
     $     factors_handle, info)
      iopt = 2
      call c_fortran_slugrid(iopt, MPI_COMM_WORLD, nprow, npcol,
     $     grid_handle)
!
      call mpi_finalize(ierr)
!
      stop
      end
SuperLU_DIST_5.3.0/FORTRAN/README0000644013363400111340000000220613233431301014627 0ustar  xiaoyessg		Fortran 90 Interface

This directory contains Fortran-90 wrapper routines for SuperLU_DIST.
The directory contains the following files:
    superlu_mod.f90    -  Fortran 90 module that defines the wrapper functions
                             to access SuperLU_DIST's data structures.
    superlupara.f90    -  It contains parameters that correspond to
                             SuperLU_DIST's enumerate constants.
    superlu_c2f_wrap.c -  All the C wrapper functions, callable from Fortran.
    hbcode1.f90        -  Fortran routine to read a Harwell-Boeing matrix.

To compile the code, type 'make'

There are two examples in the directory.

1. f_5x5.f90:
   A small 5x5 example appeared in the SuperLU Users Guide, Section 2.2.
   To run the code on a Cray XT or XE, type 'aprun -n 2 f_5x5'
   (The example is set up to use 2 processors.)

2. f_pddrive.f90 / f_pzdrive.f90:
   An example Fortran driver routine that reads a matrix from a file
   'g20.rua' in Harwell-Boeing format.
   To run the code on a Cray XT or XE, type 'aprun -n 4 f_pddrive'
   (The example is set up to use 4 processors.)

   The complex version:
   % aprun -n 4 f_pzdrive
SuperLU_DIST_5.3.0/FORTRAN/superlupara.f900000644013363400111340000001010413233431301016626 0ustar  xiaoyessg!> @file
!! \brief This module contains some parameter used in SuperLU for
!! Fortran90 user.
!

module superlupara_mod

!----------------------------------------------------
! This module contains some parameter used in SUPERLU for Fortran90 user.
!----------------------------------------------------


implicit none
public superlu_ptr

!----------------------------------------------------
! kind of integer to hold a SuperLU pointer.  Use default integer.
! This might need to be changed on systems with large memory.
! If changed, be sure to change it in superlu_c2f_wrap.c too.
!
! integer, parameter :: superlu_ptr = kind(0) ! default integer size: 32-bit
integer, parameter :: superlu_ptr = 8 ! 64-bit

!----------------------------------------------------
! The following parameters are defined:

! These values come from superlu_defs.h.  If the values in there change with
! the version of SuperLU, then they need to be changed here, too.

integer, parameter, public :: &
                      NO                      = 0, & ! yes_no_t
                      YES                     = 1, &
                      DOFACT                  = 0, & ! fact_t
                      SamePattern             = 1, &
                      SamePattern_SameRowPerm = 2, &
                      FACTORED                = 3, &
                      NOROWPERM               = 0, & ! rowperm_t
                      LargeDiag               = 1, &
                      MY_PERMR                = 2, &
                      NATURAL                 = 0, & ! colperm_t
                      MMD_ATA                 = 1, &
                      MMD_AT_PLUS_A           = 2, &
                      COLAMD                  = 3, &
                      METIS_AT_PLUS_A         = 4, &
                      PARMETIS                = 5, &
                      ZOLTAN                  = 6, &
                      MY_PERMC                = 7, &
                      NOTRANS                 = 0, & ! trans_t
                      TRANS                   = 1, &
                      CONJ                    = 2, &
                      NOEQUIL                 = 0, & ! DiagScale_t  Need?
                      ROW                     = 1, &
                      COL                     = 2, &
                      BOTH                    = 3, &
                      NOREFINE                = 0, & ! IterRefine_t
                      SINGLE                  = 1, &
                      DOUBLE                  = 2, &
                      EXTRA                   = 3, &
                      LUSUP                   = 0, & ! MemType  Need?
                      UCOL                    = 1, &
                      LSUB                    = 2, &
                      USUB                    = 3, &
                      SYSTEM                  = 0, & ! LU_space_t  Need?
                      USER                    = 1
integer, parameter, public :: &
                      SLU_NC                  = 0, & ! Stype_t
                      SLU_NCP                 = 1, &
                      SLU_NR                  = 2, &
                      SLU_SC                  = 3, &
                      SLU_SCP                 = 4, &
                      SLU_SR                  = 5, &
                      SLU_DN                  = 6, &
                      SLU_NR_loc              = 7, &
                      SLU_S                   = 0, & ! Dtype_t
                      SLU_D                   = 1, &
                      SLU_C                   = 2, &
                      SLU_Z                   = 3, &
                      SLU_GE                  = 0, & ! Mtype_t
                      SLU_TRLU                = 1, &
                      SLU_TRUU                = 2, &
                      SLU_TRL                 = 3, &
                      SLU_TRU                 = 4, &
                      SLU_SYL                 = 5, &
                      SLU_SYU                 = 6, &
                      SLU_HEL                 = 7, &
                      SLU_HEU                 = 8


!----------------------------------------------------

end module superlupara_mod
SuperLU_DIST_5.3.0/FORTRAN/dcreate_dist_matrix.c0000644013363400111340000001352113233431301020133 0ustar  xiaoyessg/*! \file
Copyright (c) 2003, The Regents of the University of California, through
Lawrence Berkeley National Laboratory (subject to receipt of any required 
approvals from U.S. Dept. of Energy) 

All rights reserved. 

The source code is distributed under BSD license, see the file License.txt
at the top-level directory.
*/


/*! @file 
 * \brief Distribute the input matrix in a distributed compressed row format.
 *
 * 
 * -- Distributed SuperLU routine (version 3.2) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * October 2012
 *
 *
 * Purpose
 * =======
 * 
 * DCREATE_DIST_MATRIX reads the global matrix from three input arrays
 * and distribute it to the processes in a distributed compressed row format.
 *
 * Arguments   
 * =========      
 *
 * A             (output) SuperMatrix*
 *               Local matrix A in NR_loc format. 
 *
 * M             (input) int_t
 *               The row number of the global matrix. 
 *
 * N             (input) int_t
 *               The col number of the global matrix. 
 *
 * NNZ           (input) int_t
 *               The number nonzeros in the global matrix. 
 *
 * NZVAL_G       (input) double*
 *               Nonzero values of the global matrix. 
 *
 * ROWIND_G      (input) int_t*
 *               Row indices of the global matrix. 
 *
 * COLPTR_G      (input) int_t*
 *               Columns pointers of the global matrix. 
 *
 * GRID          (input) gridinof_t*
 *               The 2D process mesh.
 *
 * 
*/ #include #include "superlu_ddefs.h" int dcreate_dist_matrix(SuperMatrix *A, int_t m, int_t n, int_t nnz, double *nzval_g, int_t *rowind_g, int_t *colptr_g, gridinfo_t *grid) { SuperMatrix GA; /* global A */ int_t *rowind, *colptr; /* global */ double *nzval; /* global */ double *nzval_loc; /* local */ int_t *colind, *rowptr; /* local */ int_t m_loc, fst_row, nnz_loc; int_t m_loc_fst; /* Record m_loc of the first p-1 processors, when mod(m, p) is not zero. */ int_t iam, row, col, i, j, relpos; char trans[1]; int_t *marker; iam = grid->iam; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter dcreate_dist_matrix()"); #endif if ( !iam ) { /* Allocate storage for compressed column representation. */ dallocateA_dist(n, nnz, &nzval, &rowind, &colptr); /* Copy the global matrix. */ #if 0 /* and ADJUST to 0-based indexing which is required by the C routines.*/ #endif for(i=0; icomm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( nzval, nnz, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( rowind, nnz, mpi_int_t, 0, grid->comm ); MPI_Bcast( colptr, n+1, mpi_int_t, 0, grid->comm ); } else { /* Receive matrix A from PE 0. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid->comm ); /* Allocate storage for compressed column representation. */ dallocateA_dist(n, nnz, &nzval, &rowind, &colptr); MPI_Bcast( nzval, nnz, MPI_DOUBLE, 0, grid->comm ); MPI_Bcast( rowind, nnz, mpi_int_t, 0, grid->comm ); MPI_Bcast( colptr, n+1, mpi_int_t, 0, grid->comm ); } #if 0 nzval[0]=0.1; #endif /* Compute the number of rows to be distributed to local process */ m_loc = m / (grid->nprow * grid->npcol); m_loc_fst = m_loc; /* When m / procs is not an integer */ if ((m_loc * grid->nprow * grid->npcol) != m) { m_loc = m_loc+1; m_loc_fst = m_loc; if (iam == (grid->nprow * grid->npcol - 1)) m_loc = m - m_loc_fst * (grid->nprow * grid->npcol - 1); } /* Create compressed column matrix for GA. */ dCreate_CompCol_Matrix_dist(&GA, m, n, nnz, nzval, rowind, colptr, SLU_NC, SLU_D, SLU_GE); /************************************************* * Change GA to a local A with NR_loc format * *************************************************/ rowptr = (int_t *) intMalloc_dist(m_loc+1); marker = (int_t *) intCalloc_dist(n); /* Get counts of each row of GA */ for (i = 0; i < n; ++i) for (j = colptr[i]; j < colptr[i+1]; ++j) ++marker[rowind[j]]; /* Set up row pointers */ rowptr[0] = 0; fst_row = iam * m_loc_fst; nnz_loc = 0; for (j = 0; j < m_loc; ++j) { row = fst_row + j; rowptr[j+1] = rowptr[j] + marker[row]; marker[j] = rowptr[j]; } nnz_loc = rowptr[m_loc]; nzval_loc = (double *) doubleMalloc_dist(nnz_loc); colind = (int_t *) intMalloc_dist(nnz_loc); /* Transfer the matrix into the compressed row storage */ for (i = 0; i < n; ++i) { for (j = colptr[i]; j < colptr[i+1]; ++j) { row = rowind[j]; if ( (row>=fst_row) && (row=1 ) if ( !iam ) dPrint_CompCol_Matrix_dist(&GA); #endif /* Destroy GA */ Destroy_CompCol_Matrix_dist(&GA); /******************************************************/ /* Change GA to a local A with NR_loc format */ /******************************************************/ /* Set up the local A in NR_loc format */ dCreate_CompRowLoc_Matrix_dist(A, m, n, nnz_loc, m_loc, fst_row, nzval_loc, colind, rowptr, SLU_NR_loc, SLU_D, SLU_GE); SUPERLU_FREE(marker); #if ( DEBUGlevel>=1 ) printf("sizeof(NRforamt_loc) %d\n", sizeof(NRformat_loc)); CHECK_MALLOC(iam, "Exit dcreate_dist_matrix()"); #endif return 0; } SuperLU_DIST_5.3.0/FORTRAN/zcreate_dist_matrix.c0000644013363400111340000001363213233431301020164 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Distribute the input matrix in a distributed compressed row format. * *
 * -- Distributed SuperLU routine (version 3.2) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * October 2012
 *
 *
 * Purpose
 * =======
 * 
 * ZCREATE_DIST_MATRIX reads the global matrix from three input arrays
 * and distribute it to the processes in a distributed compressed row format.
 *
 * Arguments   
 * =========      
 *
 * A             (output) SuperMatrix*
 *               Local matrix A in NR_loc format. 
 *
 * M             (input) int_t
 *               The row number of the global matrix. 
 *
 * N             (input) int_t
 *               The col number of the global matrix. 
 *
 * NNZ           (input) int_t
 *               The number nonzeros in the global matrix. 
 *
 * NZVAL_G       (input) doublecomplex*
 *               Nonzero values of the global matrix. 
 *
 * ROWIND_G      (input) int_t*
 *               Row indices of the global matrix. 
 *
 * COLPTR_G      (input) int_t*
 *               Columns pointers of the global matrix. 
 *
 * GRID          (input) gridinof_t*
 *               The 2D process mesh.
 *
 * 
*/ #include #include "superlu_zdefs.h" int zcreate_dist_matrix(SuperMatrix *A, int_t m, int_t n, int_t nnz, doublecomplex *nzval_g, int_t *rowind_g, int_t *colptr_g, gridinfo_t *grid) { SuperMatrix GA; /* global A */ int_t *rowind, *colptr; /* global */ doublecomplex *nzval; /* global */ doublecomplex *nzval_loc; /* local */ int_t *colind, *rowptr; /* local */ int_t m_loc, fst_row, nnz_loc; int_t m_loc_fst; /* Record m_loc of the first p-1 processors, when mod(m, p) is not zero. */ int_t iam, row, col, i, j, relpos; char trans[1]; int_t *marker; iam = grid->iam; #if ( DEBUGlevel>=1 ) CHECK_MALLOC(iam, "Enter zcreate_dist_matrix()"); #endif if ( !iam ) { /* Allocate storage for compressed column representation. */ zallocateA_dist(n, nnz, &nzval, &rowind, &colptr); /* Copy the global matrix. */ #if 0 /* and ADJUST to 0-based indexing which is required by the C routines.*/ #endif for(i=0; icomm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( nzval, nnz, SuperLU_MPI_DOUBLE_COMPLEX, 0, grid->comm ); MPI_Bcast( rowind, nnz, mpi_int_t, 0, grid->comm ); MPI_Bcast( colptr, n+1, mpi_int_t, 0, grid->comm ); } else { /* Receive matrix A from PE 0. */ MPI_Bcast( &m, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( &n, 1, mpi_int_t, 0, grid->comm ); MPI_Bcast( &nnz, 1, mpi_int_t, 0, grid->comm ); /* Allocate storage for compressed column representation. */ zallocateA_dist(n, nnz, &nzval, &rowind, &colptr); MPI_Bcast( nzval, nnz, SuperLU_MPI_DOUBLE_COMPLEX, 0, grid->comm ); MPI_Bcast( rowind, nnz, mpi_int_t, 0, grid->comm ); MPI_Bcast( colptr, n+1, mpi_int_t, 0, grid->comm ); } #if 0 nzval[0]=0.1; #endif /* Compute the number of rows to be distributed to local process */ m_loc = m / (grid->nprow * grid->npcol); m_loc_fst = m_loc; /* When m / procs is not an integer */ if ((m_loc * grid->nprow * grid->npcol) != m) { m_loc = m_loc+1; m_loc_fst = m_loc; if (iam == (grid->nprow * grid->npcol - 1)) m_loc = m - m_loc_fst * (grid->nprow * grid->npcol - 1); } /* Create compressed column matrix for GA. */ zCreate_CompCol_Matrix_dist(&GA, m, n, nnz, nzval, rowind, colptr, SLU_NC, SLU_Z, SLU_GE); /************************************************* * Change GA to a local A with NR_loc format * *************************************************/ rowptr = (int_t *) intMalloc_dist(m_loc+1); marker = (int_t *) intCalloc_dist(n); /* Get counts of each row of GA */ for (i = 0; i < n; ++i) for (j = colptr[i]; j < colptr[i+1]; ++j) ++marker[rowind[j]]; /* Set up row pointers */ rowptr[0] = 0; fst_row = iam * m_loc_fst; nnz_loc = 0; for (j = 0; j < m_loc; ++j) { row = fst_row + j; rowptr[j+1] = rowptr[j] + marker[row]; marker[j] = rowptr[j]; } nnz_loc = rowptr[m_loc]; nzval_loc = (doublecomplex *) doublecomplexMalloc_dist(nnz_loc); colind = (int_t *) intMalloc_dist(nnz_loc); /* Transfer the matrix into the compressed row storage */ for (i = 0; i < n; ++i) { for (j = colptr[i]; j < colptr[i+1]; ++j) { row = rowind[j]; if ( (row>=fst_row) && (row=1 ) if ( !iam ) dPrint_CompCol_Matrix_dist(&GA); #endif /* Destroy GA */ Destroy_CompCol_Matrix_dist(&GA); /******************************************************/ /* Change GA to a local A with NR_loc format */ /******************************************************/ /* Set up the local A in NR_loc format */ zCreate_CompRowLoc_Matrix_dist(A, m, n, nnz_loc, m_loc, fst_row, nzval_loc, colind, rowptr, SLU_NR_loc, SLU_Z, SLU_GE); SUPERLU_FREE(marker); #if ( DEBUGlevel>=1 ) printf("sizeof(NRforamt_loc) %d\n", sizeof(NRformat_loc)); CHECK_MALLOC(iam, "Exit dcreate_dist_matrix()"); #endif return 0; } SuperLU_DIST_5.3.0/FORTRAN/zhbcode1.f900000644013363400111340000000271513233431301015773 0ustar xiaoyessg!> @file !! \brief Fortran code for reading a sparse matrix in Harwell-Boeing format. !! ! subroutine zhbcode1(nrow, ncol, nnzero, values, rowind, colptr) ! ================================================================ ! ... SAMPLE CODE FOR READING A SPARSE MATRIX IN STANDARD FORMAT ! ================================================================ CHARACTER TITLE*72, KEY*8, MXTYPE*3, PTRFMT*16, & INDFMT*16, VALFMT*20, RHSFMT*20 INTEGER TOTCRD, PTRCRD, INDCRD, VALCRD, RHSCRD, NROW, & NCOL , NNZERO, NELTVL INTEGER COLPTR (*), ROWIND (*) double complex VALUES (*) ! ------------------------ ! ... READ IN HEADER BLOCK ! ------------------------ READ ( 5, 1000 ) TITLE , KEY , TOTCRD, PTRCRD, INDCRD, VALCRD, & RHSCRD, MXTYPE, NROW , NCOL , NNZERO, NELTVL, & PTRFMT, INDFMT, VALFMT, RHSFMT 1000 FORMAT ( A72, A8 / 5I14 / A3, 11X, 4I14 / 2A16, 2A20 ) ! ------------------------- ! ... READ MATRIX STRUCTURE ! ------------------------- READ ( 5, PTRFMT ) ( COLPTR (I), I = 1, NCOL+1 ) READ ( 5, INDFMT ) ( ROWIND (I), I = 1, NNZERO ) IF ( VALCRD .GT. 0 ) THEN ! ---------------------- ! ... READ MATRIX VALUES ! ---------------------- READ ( 5, VALFMT ) ( VALUES (I), I = 1, NNZERO ) ENDIF return end SuperLU_DIST_5.3.0/FORTRAN/dhbcode1.f900000644013363400111340000000270513233431301015744 0ustar xiaoyessg!> @file !! \brief Fortran code for reading a sparse matrix in Harwell-Boeing format. !! ! subroutine dhbcode1(nrow, ncol, nnzero, values, rowind, colptr) ! ================================================================ ! ... SAMPLE CODE FOR READING A SPARSE MATRIX IN STANDARD FORMAT ! ================================================================ CHARACTER TITLE*72, KEY*8, MXTYPE*3, PTRFMT*16, & INDFMT*16, VALFMT*20, RHSFMT*20 INTEGER TOTCRD, PTRCRD, INDCRD, VALCRD, RHSCRD, NROW, & NCOL , NNZERO, NELTVL INTEGER COLPTR (*), ROWIND (*) REAL*8 VALUES (*) ! ------------------------ ! ... READ IN HEADER BLOCK ! ------------------------ READ ( 5, 1000 ) TITLE , KEY , TOTCRD, PTRCRD, INDCRD, VALCRD, & RHSCRD, MXTYPE, NROW , NCOL , NNZERO, NELTVL, & PTRFMT, INDFMT, VALFMT, RHSFMT 1000 FORMAT ( A72, A8 / 5I14 / A3, 11X, 4I14 / 2A16, 2A20 ) ! ------------------------- ! ... READ MATRIX STRUCTURE ! ------------------------- READ ( 5, PTRFMT ) ( COLPTR (I), I = 1, NCOL+1 ) READ ( 5, INDFMT ) ( ROWIND (I), I = 1, NNZERO ) IF ( VALCRD .GT. 0 ) THEN ! ---------------------- ! ... READ MATRIX VALUES ! ---------------------- READ ( 5, VALFMT ) ( VALUES (I), I = 1, NNZERO ) ENDIF return end SuperLU_DIST_5.3.0/FORTRAN/f_pzdrive.f900000644013363400111340000001144213233431301016261 0ustar xiaoyessg !> @file !! \brief The driver program to solve a linear system with default options. !! !!
!! -- Distributed SuperLU routine (version 3.2) --
!! Lawrence Berkeley National Lab, Univ. of California Berkeley.
!! October, 2012
!! 
! program f_pzdrive ! ! Purpose ! ======= ! ! The driver program F_PDDRIVE. ! ! This example illustrates how to use F_PDGSSVX with the full ! (default) options to solve a linear system. ! ! Seven basic steps are required: ! 1. Create C structures used in SuperLU_DIST ! 2. Initialize the MPI environment and the SuperLU process grid ! 3. Set up the input matrix and the right-hand side ! 4. Set the options argument ! 5. Call f_pdgssvx ! 6. Release the process grid and terminate the MPI environment ! 7. Release all structures ! ! use superlu_mod ! implicit none include 'mpif.h' integer maxn, maxnz, maxnrhs parameter ( maxn = 10000, maxnz = 100000, maxnrhs = 10 ) integer rowind(maxnz), colptr(maxn) double complex values(maxnz), b(maxn), berr(maxnrhs) integer n, m, nnz, nprow, npcol, ldb, init integer*4 iam, info, i, ierr, ldb4, nrhs integer(superlu_ptr) :: grid integer(superlu_ptr) :: options integer(superlu_ptr) :: ScalePermstruct integer(superlu_ptr) :: LUstruct integer(superlu_ptr) :: SOLVEstruct integer(superlu_ptr) :: A integer(superlu_ptr) :: stat ! Initialize MPI environment call mpi_init(ierr) ! Check malloc ! call f_check_malloc(iam) ! Create Fortran handles for the C structures used in SuperLU_DIST call f_create_gridinfo_handle(grid) call f_create_options_handle(options) call f_create_ScalePerm_handle(ScalePermstruct) call f_create_LUstruct_handle(LUstruct) call f_create_SOLVEstruct_handle(SOLVEstruct) call f_create_SuperMatrix_handle(A) call f_create_SuperLUStat_handle(stat) ! Initialize the SuperLU_DIST process grid nprow = 2 npcol = 2 call f_superlu_gridinit(MPI_COMM_WORLD, nprow, npcol, grid) ! Bail out if I do not belong in the grid. call get_GridInfo(grid, iam=iam) if ( iam >= nprow * npcol ) then go to 100 endif if ( iam == 0 ) then write(*,*) ' Process grid ', nprow, ' X ', npcol endif ! Read Harwell-Boeing matrix, and adjust the pointers and indices ! to 0-based indexing, as required by C routines. if ( iam == 0 ) then open(file = "../EXAMPLE/cg20.cua", status = "old", unit = 5) call zhbcode1(m, n, nnz, values, rowind, colptr) close(unit = 5) ! do i = 1, n+1 colptr(i) = colptr(i) - 1 enddo do i = 1, nnz rowind(i) = rowind(i) - 1 enddo endif ! Distribute the matrix to the process gird call f_zcreate_dist_matrix(A, m, n, nnz, values, rowind, colptr, grid) ! Setup the right hand side call get_CompRowLoc_Matrix(A, nrow_loc=ldb) do i = 1, ldb b(i) = 1.0 enddo nrhs = 1 ldb4 = ldb ! Set the default input options call f_set_default_options(options) ! Change one or more options ! call set_superlu_options(options,Fact=FACTORED) ! call set_superlu_options(options,ParSymbFact=YES) ! Initialize ScalePermstruct and LUstruct call get_SuperMatrix(A, nrow=m, ncol=n) call f_ScalePermstructInit(m, n, ScalePermstruct) call f_LUstructInit(m, n, LUstruct) ! Initialize the statistics variables call f_PStatInit(stat) ! Call the linear equation solver call f_pzgssvx(options, A, ScalePermstruct, b, ldb4, nrhs, & grid, LUstruct, SOLVEstruct, berr, stat, info) if (info == 0) then write (*,*) 'Backward error: ', (berr(i), i = 1, nrhs) else write(*,*) 'INFO from f_pdgssvx = ', info endif ! Deallocate the storage allocated by SuperLU_DIST call f_PStatFree(stat) call f_Destroy_CompRowLoc_Mat_dist(A) call f_ScalePermstructFree(ScalePermstruct) call f_Destroy_LU(n, grid, LUstruct) call f_LUstructFree(LUstruct) call get_superlu_options(options, SolveInitialized=init) if (init == YES) then call f_zSolveFinalize(options, SOLVEstruct) endif ! Release the SuperLU process grid 100 call f_superlu_gridexit(grid) ! Deallocate the C structures pointed to by the Fortran handles call f_destroy_gridinfo_handle(grid) call f_destroy_options_handle(options) call f_destroy_ScalePerm_handle(ScalePermstruct) call f_destroy_LUstruct_handle(LUstruct) call f_destroy_SOLVEstruct_handle(SOLVEstruct) call f_destroy_SuperMatrix_handle(A) call f_destroy_SuperLUStat_handle(stat) ! Check malloc ! call f_check_malloc(iam) ! Terminate the MPI execution environment call mpi_finalize(ierr) stop end SuperLU_DIST_5.3.0/FORTRAN/hbcode1.f.bak0000644013363400111340000000261313233431301016161 0ustar xiaoyessg subroutine hbcode1(nrow, ncol, nnzero, values, rowind, colptr) C ================================================================ C ... SAMPLE CODE FOR READING A SPARSE MATRIX IN STANDARD FORMAT C ================================================================ CHARACTER TITLE*72 , KEY*8 , MXTYPE*3 , 1 PTRFMT*16, INDFMT*16, VALFMT*20, RHSFMT*20 INTEGER TOTCRD, PTRCRD, INDCRD, VALCRD, RHSCRD, 1 NROW , NCOL , NNZERO, NELTVL INTEGER COLPTR (*), ROWIND (*) REAL*8 VALUES (*) C ------------------------ C ... READ IN HEADER BLOCK C ------------------------ READ ( *, 1000 ) TITLE , KEY , 1 TOTCRD, PTRCRD, INDCRD, VALCRD, RHSCRD, 2 MXTYPE, NROW , NCOL , NNZERO, NELTVL, 3 PTRFMT, INDFMT, VALFMT, RHSFMT 1000 FORMAT ( A72, A8 / 5I14 / A3, 11X, 4I14 / 2A16, 2A20 ) C ------------------------- C ... READ MATRIX STRUCTURE C ------------------------- READ ( *, PTRFMT ) ( COLPTR (I), I = 1, NCOL+1 ) READ ( *, INDFMT ) ( ROWIND (I), I = 1, NNZERO ) IF ( VALCRD .GT. 0 ) THEN C ---------------------- C ... READ MATRIX VALUES C ---------------------- READ ( *, VALFMT ) ( VALUES (I), I = 1, NNZERO ) ENDIF return end SuperLU_DIST_5.3.0/FORTRAN/superlu_mod.f900000644013363400111340000001333513233431301016632 0ustar xiaoyessg!> @file !! \brief This module contains Fortran-side wrappers for the SuperLU !! get/set functions. ! module superlu_mod !---------------------------------------------------- ! This module contains Fortran-side wrappers for the SuperLU get/set ! functions, with optional arguments so the user doesn't have to provide ! the full set of components. !---------------------------------------------------- use superlupara_mod implicit none contains subroutine get_GridInfo(grid, iam, nprow, npcol) integer(superlu_ptr) :: grid integer*4, optional :: iam integer, optional :: nprow, npcol integer :: l_iam, l_nprow, l_npcol call f_get_gridinfo(grid, l_iam, l_nprow, l_npcol) if (present(iam)) iam = l_iam if (present(nprow)) nprow = l_nprow if (present(npcol)) npcol = l_npcol end subroutine get_GridInfo subroutine get_SuperMatrix(A, nrow, ncol) integer(superlu_ptr) :: A integer, optional :: nrow, ncol integer :: l_nrow, l_ncol call f_get_SuperMatrix(A, l_nrow, l_ncol) if (present(nrow)) nrow = l_nrow if (present(ncol)) ncol = l_ncol end subroutine get_SuperMatrix subroutine set_SuperMatrix(A, nrow, ncol) integer(superlu_ptr) :: A integer, optional :: nrow, ncol integer :: l_nrow, l_ncol call f_get_SuperMatrix(A, l_nrow, l_ncol) if (present(nrow)) l_nrow = nrow if (present(ncol)) l_ncol = ncol call f_set_SuperMatrix(A, l_nrow, l_ncol) end subroutine set_SuperMatrix subroutine get_CompRowLoc_Matrix(A, nrow, ncol, nnz_loc, nrow_loc, fst_row) integer(superlu_ptr) :: A integer, optional :: nrow, ncol, nnz_loc, nrow_loc, fst_row integer :: l_nrow, l_ncol, l_nnz_loc, l_nrow_loc, l_fst_row call f_get_CompRowLoc_Matrix(A, l_nrow, l_ncol, l_nnz_loc, l_nrow_loc, & l_fst_row) if (present(nrow)) nrow = l_nrow if (present(ncol)) ncol = l_ncol if (present(nnz_loc)) nnz_loc = l_nnz_loc if (present(nrow_loc)) nrow_loc = l_nrow_loc if (present(fst_row)) fst_row = l_fst_row end subroutine get_CompRowLoc_Matrix subroutine set_CompRowLoc_Matrix(A, nrow, ncol, nnz_loc, nrow_loc, fst_row) integer(superlu_ptr) :: A integer, optional :: nrow, ncol, nnz_loc, nrow_loc, fst_row integer :: l_nrow, l_ncol, l_nnz_loc, l_nrow_loc, l_fst_row call f_set_CompRowLoc_Matrix(A, l_nrow, l_ncol, l_nnz_loc, l_nrow_loc, & l_fst_row) if (present(nrow)) l_nrow = nrow if (present(ncol)) l_ncol = ncol if (present(nnz_loc)) l_nnz_loc = nnz_loc if (present(nrow_loc)) l_nrow_loc = nrow_loc if (present(fst_row)) l_fst_row = fst_row end subroutine set_CompRowLoc_Matrix subroutine get_superlu_options(opt, Fact, Equil, ParSymbFact, ColPerm, & RowPerm, IterRefine, Trans, ReplaceTinyPivot, SolveInitialized, & RefineInitialized, PrintStat) integer(superlu_ptr) :: opt integer, optional :: Fact, Equil, ParSymbFact, ColPerm, RowPerm, & IterRefine, Trans, ReplaceTinyPivot, SolveInitialized, & RefineInitialized, PrintStat ! integer :: l_Fact, l_Equil, l_ParSymbFact, l_ColPerm, l_RowPerm, & l_IterRefine, l_Trans, l_ReplaceTinyPivot, l_SolveInitialized, & l_RefineInitialized, l_PrintStat call f_get_superlu_options(opt, l_Fact, l_Equil, l_ParSymbFact, l_ColPerm, & l_RowPerm, l_IterRefine, l_Trans, & l_ReplaceTinyPivot, l_SolveInitialized, & l_RefineInitialized, l_PrintStat) if (present(Fact)) Fact = l_Fact if (present(Equil)) Equil = l_Equil if (present(ParSymbFact)) ParSymbFact = l_ParSymbFact if (present(ColPerm)) ColPerm = l_ColPerm if (present(RowPerm)) RowPerm = l_RowPerm if (present(IterRefine)) IterRefine = l_IterRefine if (present(Trans)) Trans = l_Trans if (present(ReplaceTinyPivot)) ReplaceTinyPivot = l_ReplaceTinyPivot if (present(SolveInitialized)) SolveInitialized = l_SolveInitialized if (present(RefineInitialized)) RefineInitialized = l_RefineInitialized if (present(PrintStat)) PrintStat = l_PrintStat end subroutine get_superlu_options subroutine set_superlu_options(opt, Fact, Equil, ParSymbFact, ColPerm, & RowPerm, IterRefine, Trans, ReplaceTinyPivot, SolveInitialized, & RefineInitialized, PrintStat) integer(superlu_ptr) :: opt integer, optional :: Fact, Equil, ParSymbFact, ColPerm, RowPerm, & IterRefine, Trans, ReplaceTinyPivot, SolveInitialized, & RefineInitialized, PrintStat ! integer :: l_Fact, l_Equil, l_ParSymbFact, l_ColPerm, l_RowPerm, & l_IterRefine, l_Trans, l_ReplaceTinyPivot, l_SolveInitialized, & l_RefineInitialized, l_PrintStat call f_get_superlu_options(opt, l_Fact, l_Equil, l_ParSymbFact, l_ColPerm, & l_RowPerm, l_IterRefine, l_Trans, & l_ReplaceTinyPivot, l_SolveInitialized, & l_RefineInitialized, l_PrintStat) if (present(Fact)) l_Fact = Fact if (present(Equil)) l_Equil = Equil if (present(ParSymbFact)) l_ParSymbFact = ParSymbFact if (present(ColPerm)) l_ColPerm = ColPerm if (present(RowPerm)) l_RowPerm = RowPerm if (present(IterRefine)) l_IterRefine = IterRefine if (present(Trans)) l_Trans = Trans if (present(ReplaceTinyPivot)) l_ReplaceTinyPivot = ReplaceTinyPivot if (present(SolveInitialized)) l_SolveInitialized = SolveInitialized if (present(RefineInitialized)) l_RefineInitialized = RefineInitialized if (present(PrintStat)) l_PrintStat = PrintStat call f_set_superlu_options(opt, l_Fact, l_Equil, l_ParSymbFact, & l_ColPerm, l_RowPerm, l_IterRefine, l_Trans, & l_ReplaceTinyPivot, l_SolveInitialized, & l_RefineInitialized, l_PrintStat) end subroutine set_superlu_options end module superlu_mod SuperLU_DIST_5.3.0/FORTRAN/f_pddrive_old.f900000644013363400111340000000765013233431301017077 0ustar xiaoyessg! ! -- Distributed SuperLU routine (version 2.0) -- ! Lawrence Berkeley National Lab, Univ. of California Berkeley. ! July 29, 2003 ! ! program f_pddrive use superlu_mod include 'mpif.h' implicit none integer maxn, maxnz, maxnrhs parameter ( maxn = 10000, maxnz = 100000, maxnrhs = 10 ) integer rowind(maxnz), colptr(maxn) real*8 values(maxnz), b(maxn), berr(maxnrhs) integer n, m, nnz, nrhs, ldb, i, ierr, info, iam, m_loc, nnz_loc, fst_row integer nprow, npcol integer init integer(superlu_ptr) :: grid integer(superlu_ptr) :: options integer(superlu_ptr) :: ScalePermstruct integer(superlu_ptr) :: LUstruct integer(superlu_ptr) :: SOLVEstruct integer(superlu_ptr) :: A integer(superlu_ptr) :: stat ! Default process rows nprow = 1 ! Default process columns npcol = 1 ! Number of right-hand side nrhs = 1 ! INITIALIZE MPI ENVIRONMENT call mpi_init(ierr) ! Check Malloc call f_check_malloc(iam) ! create C structures used in superlu call f_create_gridinfo(grid) call f_create_options(options) call f_create_ScalePermstruct(ScalePermstruct) call f_create_LUstruct(LUstruct) call f_create_SOLVEstruct(SOLVEstruct) call f_create_SuperMatrix(A) ! initialize the SuperLU process grid nprow = 2 npcol = 2 call f_superlu_gridinit(MPI_COMM_WORLD, nprow, npcol, grid) ! Bail out if I do not belong in the grid. call get_GridInfo(grid, iam=iam) if ( iam >= nprow * npcol ) then go to 100 endif if ( iam == 0 ) then write(*,*) ' Process grid ', nprow, ' X ', npcol endif ! Read Harwell-Boeing matrix if ( iam == 0 ) then call hbcode1(m, n, nnz, values, rowind, colptr) endif ! Distribute the matrix to the gird call f_dcreate_matrix_dis(A, m, n, nnz, values, rowind, colptr, grid) ! Get m_loc call get_CompRowLoc_Matrix(A, nrow_loc=m_loc); ! Setup the right hand side nrhs = 1 ldb = m_loc do i = 1, ldb b(i) = 1.0 enddo ! set the default input options call f_set_default_options(options) ! set one or more option ! call set_superlu_options(options,Fact=FACTORED) ! initialize ScalePermstruct and LUstruct ! get the m and n call get_SuperMatrix(A,nrow=m,ncol=n) call f_ScalePermstructInit(m, n, ScalePermstruct) call f_LUstructInit(m, n, LUstruct) ! initialize the statistics variables call f_create_SuperLUStat(stat) call f_PStatInit(stat) ! call the linear equation solver call f_pdgssvx(options, A, & ScalePermstruct, b, & ldb, nrhs, grid, & LUstruct, SOLVEstruct, berr, & stat, info) if (info == 0) then write (*,*) 'Backward error: ', (berr(i), i = 1, nrhs) else write(*,*) 'INFO from f_pdgssvx = ', info endif ! free memory call f_PStatFree(stat) call f_destroy_SuperLUStat(stat) ! deallocate SuperLU allocated storage call f_Destroy_CompRowLoc_Matrix_dis(A) call f_ScalePermstructFree(ScalePermstruct) ! call get_SuperMatrix(A,ncol=n) call f_Destroy_LU(n, grid, LUstruct) call f_LUstructFree(LUstruct) call get_superlu_options(options, SolveInitialized=init) if (init == YES) then call f_dSolveFinalize(options, SOLVEstruct) endif ! release the SuperLU process grid 100 call f_superlu_gridexit(grid) ! destroy C structures in superlu_matrix_type call f_destroy_gridinfo(grid) call f_destroy_options(options) call f_destroy_ScalePermstruct(ScalePermstruct) call f_destroy_LUstruct(LUstruct) call f_destroy_SOLVEstruct(SOLVEstruct) call f_destroy_SuperMatrix(A) ! TERMINATES THE MPI EXECUTION ENVIRONMENT call mpi_finalize(ierr) ! ! Check Malloc call f_check_malloc(iam) stop end SuperLU_DIST_5.3.0/FORTRAN/c_fortran_pdgssvx_ABglobal.c0000644013363400111340000001447713233431301021406 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /* * -- Distributed SuperLU routine (version 2.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley. * July 10, 2003 */ #include "superlu_ddefs.h" #define HANDLE_SIZE 8 typedef struct { ScalePermstruct_t *ScalePermstruct; LUstruct_t *LUstruct; } factors_dist_t; int c_fortran_pdgssvx_ABglobal_(int *iopt, int_t *n, int_t *nnz, int *nrhs, double *values, int_t *rowind, int_t *colptr, double *b, int *ldb, int grid_handle[HANDLE_SIZE], double *berr, int factors[HANDLE_SIZE], int *info) { /* * Purpose * ======= * * This is a Fortran wrapper to use pdgssvx_ABglobal(). * * Arguments * ========= * * iopt (input) int * Specifies the operation to be performed: * = 1, performs LU decomposition for the first time * = 2, performs a subsequent LU decomposition for a new matrix * with the same sparsity pattern * = 3, performs triangular solve * = 4, frees all the storage in the end * * n (input) int, order of the matrix A * * nnz (input) int, number of nonzeros in matrix A * * nrhs (input) int, number of right-hand sides in the system AX = B * * values/rowind/colptr (input) column compressed data structure for A * * b (input/output) double * On input, the right-hand side matrix of dimension (ldb, nrhs) * On output, the solution matrix * * ldb (input) int, leading dimension of the matrix B * * grid_handle (input) int array of size 8, holds a pointer to the process * grid structure, which is created and freed separately. * * berr (output) double, the backward error of each right-hand side * * factors (input/output) int array of size 8 * If iopt == 1, it is an output and contains the pointer pointing to * the structure of the factored matrices. * Otherwise, it it an input. * * info (output) int * */ superlu_options_t options; SuperLUStat_t stat; SuperMatrix A; ScalePermstruct_t *ScalePermstruct; LUstruct_t *LUstruct; int_t nprow, npcol; int iam; int report; int i; gridinfo_t *grid; factors_dist_t *LUfactors; /* * Set option for printing statistics. * report = 0: no reporting * report = 1: reporting */ report = 0; /* Locate the process grid. */ grid = (gridinfo_t *) grid_handle[0]; iam = (*grid).iam; nprow = (int_t) grid->nprow; npcol = (int_t) grid->npcol; if ( *iopt == 1 ) { /* LU decomposition */ if ( !iam ) printf(".. Process grid: %d X %d\n", nprow, npcol); /* Initialize the statistics variables. */ PStatInit(&stat); dCreate_CompCol_Matrix_dist(&A, *n, *n, *nnz, values, rowind, colptr, SLU_NC, SLU_D, SLU_GE); /* Set options. */ set_default_options(&options); /* Initialize ScalePermstruct and LUstruct. */ ScalePermstruct = (ScalePermstruct_t *) SUPERLU_MALLOC(sizeof(ScalePermstruct_t)); ScalePermstructInit(*n, *n, ScalePermstruct); LUstruct = (LUstruct_t *) SUPERLU_MALLOC(sizeof(LUstruct_t)); LUstructInit(*n, *n, LUstruct); /* Call global routine with nrhs=0 to perform the factorization. */ pdgssvx_ABglobal(&options, &A, ScalePermstruct, NULL, *ldb, 0, grid, LUstruct, berr, &stat, info); if ( *info == 0 ) { if ( report == 1 ) PStatPrint(&options, &stat, grid); } else { printf("pdgssvx_ABglobal() error returns INFO= %d\n", *info); } /* Save the LU factors in the factors handle */ LUfactors = (factors_dist_t*) SUPERLU_MALLOC(sizeof(factors_dist_t)); LUfactors->ScalePermstruct = ScalePermstruct; LUfactors->LUstruct = LUstruct; factors[0] = (int) LUfactors; /* Free un-wanted storage */ Destroy_SuperMatrix_Store_dist(&A); PStatFree(&stat); } else if ( *iopt == 2 ) { /* Factor a modified matrix with the same sparsity pattern using existing permutations and L U storage */ /* Extract the LU factors in the factors handle */ LUfactors = (factors_dist_t*) factors[0]; ScalePermstruct = LUfactors->ScalePermstruct; LUstruct = LUfactors->LUstruct; PStatInit(&stat); /* Reset SuperMatrix pointers. */ dCreate_CompCol_Matrix_dist(&A, *n, *n, *nnz, values, rowind, colptr, SLU_NC, SLU_D, SLU_GE); /* Set options. */ set_default_options(&options); options.Fact = SamePattern_SameRowPerm; /* Call the routine with nrhs=0 to perform the factorization. */ pdgssvx_ABglobal(&options, &A, ScalePermstruct, NULL, *ldb, 0, grid, LUstruct, berr, &stat, info); if ( *info == 0 ) { if ( report == 1 ) PStatPrint(&options, &stat, grid); } else { printf("pdgssvx_ABglobal() error returns INFO= %d\n", *info); } /* Free un-wanted storage */ Destroy_SuperMatrix_Store_dist(&A); PStatFree(&stat); } else if ( *iopt == 3 ) { /* Triangular solve */ /* Extract the LU factors in the factors handle */ LUfactors = (factors_dist_t*) factors[0]; ScalePermstruct = LUfactors->ScalePermstruct; LUstruct = LUfactors->LUstruct; PStatInit(&stat); /* Reset SuperMatrix pointers. */ dCreate_CompCol_Matrix_dist(&A, *n, *n, *nnz, values, rowind, colptr, SLU_NC, SLU_D, SLU_GE); /* Set options. */ set_default_options(&options); options.Fact = FACTORED; /* Solve the system A*X=B, overwriting B with X. */ pdgssvx_ABglobal(&options, &A, ScalePermstruct, b, *ldb, *nrhs, grid, LUstruct, berr, &stat, info); /* Free un-wanted storage */ Destroy_SuperMatrix_Store_dist(&A); PStatFree(&stat); } else if ( *iopt == 4 ) { /* Free storage */ /* Free the LU factors in the factors handle */ LUfactors = (factors_dist_t*) factors[0]; Destroy_LU(*n, grid, LUfactors->LUstruct); LUstructFree(LUfactors->LUstruct); ScalePermstructFree(LUfactors->ScalePermstruct); SUPERLU_FREE(LUfactors->ScalePermstruct); SUPERLU_FREE(LUfactors->LUstruct); SUPERLU_FREE(LUfactors); } else { fprintf(stderr, "Invalid iopt=%d passed to c_fortran_pdgssvx_ABglobal()\n", *iopt); exit(-1); } } SuperLU_DIST_5.3.0/FORTRAN/superlu_c2f_dwrap.c0000644013363400111340000002311313233431301017541 0ustar xiaoyessg /*! @file * \brief C interface functions for the Fortran90 wrapper. * *
 * -- Distributed SuperLU routine (version 4.1) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * October 2012
 * April 5, 2015
 */

#include "superlu_ddefs.h"
#include "Cnames.h"

/* kind of integer to hold a pointer.  Use int.
   This might need to be changed on systems with large memory.
   If changed, be sure to change it in superlupara.f90 too */

#if 0
typedef int fptr;  /* 32-bit */
#else
typedef long long int fptr;  /* 64-bit */
#endif


/* some MPI implementations may require conversion between a Fortran
   communicator and a C communicator.  This routine is used to perform the
   conversion.  It may need different forms for different MPI libraries. */

/* NO_MPI2 should be defined on the compiler command line if the MPI
   library does not provide MPI_Comm_f2c */

MPI_Comm f2c_comm(int *f_comm)
{
#ifndef NO_MPI2

/* MPI 2 provides a standard way of doing this */
   return MPI_Comm_f2c((MPI_Fint)(*f_comm));
#else

/* will probably need some special cases here */
/* when in doubt, just return the input */
   return (MPI_Comm)(*f_comm);
#endif
}


/* functions that create memory for a struct and return a handle */

void f_create_gridinfo_handle(fptr *handle)
{
   *handle = (fptr) SUPERLU_MALLOC(sizeof(gridinfo_t));
}

void f_create_options_handle(fptr *handle)
{
   *handle = (fptr) SUPERLU_MALLOC(sizeof(superlu_dist_options_t));
}

void f_create_ScalePerm_handle(fptr *handle)
{
   *handle = (fptr) SUPERLU_MALLOC(sizeof(ScalePermstruct_t));
}

void f_create_LUstruct_handle(fptr *handle)
{
   *handle = (fptr) SUPERLU_MALLOC(sizeof(LUstruct_t));
}

void f_create_SOLVEstruct_handle(fptr *handle)
{
   *handle = (fptr) SUPERLU_MALLOC(sizeof(SOLVEstruct_t));
}

void f_create_SuperMatrix_handle(fptr *handle)
{
   *handle = (fptr) SUPERLU_MALLOC(sizeof(SuperMatrix));
}

void f_create_SuperLUStat_handle(fptr *handle)
{
   *handle = (fptr) SUPERLU_MALLOC(sizeof(SuperLUStat_t));
}

/* functions that free the memory allocated by the above functions */

void f_destroy_gridinfo_handle(fptr *handle)
{
   SUPERLU_FREE((void *)*handle);
}

void f_destroy_options_handle(fptr *handle)
{
   SUPERLU_FREE((void *)*handle);
}

void f_destroy_ScalePerm_handle(fptr *handle)
{
   SUPERLU_FREE((void *)*handle);
}

void f_destroy_LUstruct_handle(fptr *handle)
{
   SUPERLU_FREE((void *)*handle);
}

void f_destroy_SOLVEstruct_handle(fptr *handle)
{
   SUPERLU_FREE((void *)*handle);
}

void f_destroy_SuperMatrix_handle(fptr *handle)
{
   SUPERLU_FREE((void *)*handle);
}

void f_destroy_SuperLUStat_handle(fptr *handle)
{
   SUPERLU_FREE((void *)*handle);
}

/* functions that get or set values in a C struct.
   This is not the complete set of structs for which a user might want
   to get/set a component, and there may be missing components. */

void f_get_gridinfo(fptr *grid, int *iam, int_t *nprow, int_t *npcol)
{
  *iam=((gridinfo_t *) *grid)->iam;
  *npcol=((gridinfo_t *) *grid)->npcol;
  *nprow=((gridinfo_t *) *grid)->nprow;
}

void f_get_SuperMatrix(fptr *A, int_t *nrow, int_t *ncol)
{
   *nrow = ((SuperMatrix *) *A)->nrow;
   *ncol = ((SuperMatrix *) *A)->ncol;
}

void f_set_SuperMatrix(fptr *A, int_t *nrow, int_t *ncol)
{
   ((SuperMatrix *) *A)->nrow = *nrow;
   ((SuperMatrix *) *A)->ncol = *ncol;
}

void f_get_CompRowLoc_Matrix(fptr *A, int_t *m, int_t *n, int_t *nnz_loc,
			     int_t *m_loc, int_t *fst_row)
{
  *m=((SuperMatrix *) *A)->nrow;
  *n=((SuperMatrix *) *A)->ncol;
  *m_loc=((NRformat_loc *) ((SuperMatrix *) *A)->Store)->m_loc;
  *nnz_loc=((NRformat_loc *) ((SuperMatrix *) *A)->Store)->nnz_loc;
  *fst_row=((NRformat_loc *) ((SuperMatrix *) *A)->Store)->fst_row;
}

void f_set_CompRowLoc_Matrix(fptr *A, int_t *m, int_t *n, int_t *nnz_loc,
			     int_t *m_loc, int_t *fst_row)
{
  ((SuperMatrix *) *A)->nrow = *m;
  ((SuperMatrix *) *A)->ncol = *n;
  ((NRformat_loc *) ((SuperMatrix *) *A)->Store)->m_loc = *m_loc;
  ((NRformat_loc *) ((SuperMatrix *) *A)->Store)->nnz_loc = *nnz_loc;
  ((NRformat_loc *) ((SuperMatrix *) *A)->Store)->fst_row = *fst_row;
}

void f_get_superlu_options(fptr *opt, int *Fact, int *Equil, int *ParSymbFact,
                           int *ColPerm, int *RowPerm, int *IterRefine,
			   int *Trans, int *ReplaceTinyPivot,
			   int *SolveInitialized, int *RefineInitialized,
			   int *PrintStat)
{
   *Fact = (int) ((superlu_dist_options_t *) *opt)->Fact;
   *Equil = (int) ((superlu_dist_options_t *) *opt)->Equil;
   *ParSymbFact = (int) ((superlu_dist_options_t *) *opt)->ParSymbFact;
   *ColPerm = (int) ((superlu_dist_options_t *) *opt)->ColPerm;
   *RowPerm = (int) ((superlu_dist_options_t *) *opt)->RowPerm;
   *IterRefine = (int) ((superlu_dist_options_t *) *opt)->IterRefine;
   *Trans = (int) ((superlu_dist_options_t *) *opt)->Trans;
   *ReplaceTinyPivot = (int) ((superlu_dist_options_t *) *opt)->ReplaceTinyPivot;
   *SolveInitialized = (int) ((superlu_dist_options_t *) *opt)->SolveInitialized;
   *RefineInitialized = (int) ((superlu_dist_options_t *) *opt)->RefineInitialized;
   *PrintStat = (int) ((superlu_dist_options_t *) *opt)->PrintStat;
}

void f_set_superlu_options(fptr *opt, int *Fact, int *Equil, int *ParSymbFact,
                           int *ColPerm, int *RowPerm, int *IterRefine,
			   int *Trans, int *ReplaceTinyPivot,
			   int *SolveInitialized, int *RefineInitialized,
			   int *PrintStat)
{
    superlu_dist_options_t *l_options = (superlu_dist_options_t*) *opt;
    l_options->Fact = (fact_t) *Fact;
   ((superlu_dist_options_t *) *opt)->Equil = (yes_no_t) *Equil;
   ((superlu_dist_options_t *) *opt)->ParSymbFact = (yes_no_t) *ParSymbFact;
   ((superlu_dist_options_t *) *opt)->ColPerm = (colperm_t) *ColPerm;
   ((superlu_dist_options_t *) *opt)->RowPerm = (rowperm_t) *RowPerm;
   ((superlu_dist_options_t *) *opt)->IterRefine = (IterRefine_t) *IterRefine;
   ((superlu_dist_options_t *) *opt)->Trans = (trans_t) *Trans;
   ((superlu_dist_options_t *) *opt)->ReplaceTinyPivot = (yes_no_t) *ReplaceTinyPivot;
   ((superlu_dist_options_t *) *opt)->SolveInitialized = (yes_no_t) *SolveInitialized;
   ((superlu_dist_options_t *) *opt)->RefineInitialized = (yes_no_t) *RefineInitialized;
   ((superlu_dist_options_t *) *opt)->PrintStat = (yes_no_t) *PrintStat;
}

/* wrappers for SuperLU functions */

void f_set_default_options(fptr *options)
{
   set_default_options_dist((superlu_dist_options_t *) *options);
}

void f_superlu_gridinit(int *Bcomm, int_t *nprow, int_t *npcol, fptr *grid)
{
  
   superlu_gridinit(f2c_comm(Bcomm), *nprow, *npcol, (gridinfo_t *) *grid);
}

void f_superlu_gridmap(int *Bcomm, int_t *nprow, int_t *npcol, 
                       int_t *usermap, int_t *ldumap,
	 fptr *grid)
{
   superlu_gridmap(f2c_comm(Bcomm), *nprow, *npcol, usermap, *ldumap, (gridinfo_t *) *grid);
}

void f_superlu_gridexit(fptr *grid)
{
   superlu_gridexit((gridinfo_t *) *grid);
}

void f_ScalePermstructInit(int_t *m, int_t *n, fptr *ScalePermstruct)
{
   ScalePermstructInit(*m, *n, (ScalePermstruct_t *) *ScalePermstruct);
}

void f_ScalePermstructFree(fptr *ScalePermstruct)
{
   ScalePermstructFree((ScalePermstruct_t *) *ScalePermstruct);
}

void f_PStatInit(fptr *stat)
{
   PStatInit((SuperLUStat_t *) *stat);
}

void f_PStatFree(fptr *stat)
{
   PStatFree((SuperLUStat_t *) *stat);
}

void f_LUstructInit(int_t *m, int_t *n, fptr *LUstruct)
{
   extern void LUstructInit(const int_t, LUstruct_t *);

   LUstructInit(*m, (LUstruct_t *) *LUstruct);
}

void f_LUstructFree(fptr *LUstruct)
{
   extern void LUstructFree(LUstruct_t *);

   LUstructFree((LUstruct_t *) *LUstruct);
}

void f_Destroy_LU(int_t *n, fptr *grid, fptr *LUstruct)
{
   Destroy_LU(*n, (gridinfo_t *) *grid, (LUstruct_t *) *LUstruct);
}

void f_dCreate_CompRowLoc_Mat_dist(fptr *A, int_t *m, int_t *n, int_t *nnz_loc,
				   int_t *m_loc, int_t *fst_row, double *nzval,
				   int_t *colind, int_t *rowptr, int *stype,
				   int *dtype, int *mtype)
{
   dCreate_CompRowLoc_Matrix_dist((SuperMatrix *) *A, *m, *n, *nnz_loc, *m_loc,
                                  *fst_row, (double *) nzval, colind, rowptr,
                                  (Stype_t) *stype, (Dtype_t) *dtype,
                                  (Mtype_t) *mtype);
}

void f_Destroy_CompRowLoc_Mat_dist(fptr *A)
{
   Destroy_CompRowLoc_Matrix_dist((SuperMatrix *) *A);
}

void f_Destroy_SuperMat_Store_dist(fptr *A)
{
   Destroy_SuperMatrix_Store_dist((SuperMatrix *) *A);
}

void f_dSolveFinalize(fptr *options, fptr *SOLVEstruct)
{
   dSolveFinalize((superlu_dist_options_t *) *options,
                  (SOLVEstruct_t *) *SOLVEstruct);
}

void f_pdgssvx(fptr *options, fptr *A, fptr *ScalePermstruct, double *B,
               int *ldb, int *nrhs, fptr *grid, fptr *LUstruct,
               fptr *SOLVEstruct, double *berr, fptr *stat, int *info)
{
    pdgssvx((superlu_dist_options_t *) *options, (SuperMatrix *) *A,
	    (ScalePermstruct_t *) *ScalePermstruct, B, *ldb, *nrhs,
	    (gridinfo_t *) *grid, (LUstruct_t *) *LUstruct,
	    (SOLVEstruct_t *) *SOLVEstruct, berr,
	    (SuperLUStat_t *) *stat, info);

    PStatPrint((superlu_dist_options_t *) *options, (SuperLUStat_t *) *stat,
	       (gridinfo_t *) *grid);
}

/* Create the distributed matrix */

void f_dcreate_dist_matrix(fptr *A, int_t *m, int_t *n, int_t *nnz,
			   double *nzval, int_t *rowind, int_t *colptr,
			   fptr *grid)
{
   int dcreate_dist_matrix(SuperMatrix *, int_t, int_t, int_t, double *,
			   int_t * , int_t *, gridinfo_t *);

   dcreate_dist_matrix((SuperMatrix *) *A, (int_t) *m, *n, *nnz, 
		       (double *) nzval, (int_t *) rowind, (int_t *) colptr,
		       (gridinfo_t *) *grid);

}

/* Check malloc */

void f_check_malloc(int *iam)
{
#if ( DEBUGlevel>=1 )
    CHECK_MALLOC((int_t) *iam, "Check Malloc");
#endif
}
SuperLU_DIST_5.3.0/FORTRAN/c_fortran_slugrid.c0000644013363400111340000000321513233431301017622 0ustar  xiaoyessg/*! \file
Copyright (c) 2003, The Regents of the University of California, through
Lawrence Berkeley National Laboratory (subject to receipt of any required 
approvals from U.S. Dept. of Energy) 

All rights reserved. 

The source code is distributed under BSD license, see the file License.txt
at the top-level directory.
*/
#include "superlu_ddefs.h"

#define HANDLE_SIZE 8

void
c_fortran_slugrid_(int *iopt, MPI_Comm *slu_comm, int *nprow, int *npcol,
		   int grid_handle[HANDLE_SIZE])
/*
 * This routine provides a fortran call for initializing and 
 * freeing the SuperLU_DIST processor grid.  The pointer for the grid
 * structure is returned in grid_handle.
 *
 * The input option, iopt, controls the functionality:
 *   iopt=1:  allocate and define a new process grid
 *   iopt=2:  free an existing process grid
 *
 * slu_comm is the base communication handle
 * nprow is the number of processors per process grid row
 * npcol is the number of processors per process grid column
 */

{
    gridinfo_t *grid;

    if ( *iopt == 1 ) {
      /* Allocate the grid structure. */
      grid = (gridinfo_t *) SUPERLU_MALLOC(sizeof(gridinfo_t));

      /* Initialize the process grid. */
      superlu_gridinit(*slu_comm, *nprow, *npcol, grid);

      /* Set the handle passed from fortran, so that the
       * process grid can be reused. */
      grid_handle[0] = (int) grid;

    } else if ( *iopt == 2 ) {
      /* Locate and free the process grid. */
      grid = (gridinfo_t *) grid_handle[0];
      superlu_gridexit(grid);
      SUPERLU_FREE(grid);

    } else {
      fprintf(stderr, "Invalid iopt=%d passed to c_fortran_slugrid()\n", *iopt);
      exit(-1);
    }
}
SuperLU_DIST_5.3.0/FORTRAN/f_pddrive.f900000644013363400111340000001143213233431301016232 0ustar  xiaoyessg

!> @file
!! \brief The driver program to solve a linear system with default options.
!!
!! 
!! -- Distributed SuperLU routine (version 3.2) --
!! Lawrence Berkeley National Lab, Univ. of California Berkeley.
!! October, 2012
!! 
! program f_pddrive ! ! Purpose ! ======= ! ! The driver program F_PDDRIVE. ! ! This example illustrates how to use F_PDGSSVX with the full ! (default) options to solve a linear system. ! ! Seven basic steps are required: ! 1. Create C structures used in SuperLU_DIST ! 2. Initialize the MPI environment and the SuperLU process grid ! 3. Set up the input matrix and the right-hand side ! 4. Set the options argument ! 5. Call f_pdgssvx ! 6. Release the process grid and terminate the MPI environment ! 7. Release all structures ! ! use superlu_mod ! implicit none include 'mpif.h' integer maxn, maxnz, maxnrhs parameter ( maxn = 10000, maxnz = 100000, maxnrhs = 10 ) integer rowind(maxnz), colptr(maxn) real*8 values(maxnz), b(maxn), berr(maxnrhs) integer n, m, nnz, nprow, npcol, ldb, init integer*4 iam, info, i, ierr, ldb4, nrhs integer(superlu_ptr) :: grid integer(superlu_ptr) :: options integer(superlu_ptr) :: ScalePermstruct integer(superlu_ptr) :: LUstruct integer(superlu_ptr) :: SOLVEstruct integer(superlu_ptr) :: A integer(superlu_ptr) :: stat ! Initialize MPI environment call mpi_init(ierr) ! Check malloc ! call f_check_malloc(iam) ! Create Fortran handles for the C structures used in SuperLU_DIST call f_create_gridinfo_handle(grid) call f_create_options_handle(options) call f_create_ScalePerm_handle(ScalePermstruct) call f_create_LUstruct_handle(LUstruct) call f_create_SOLVEstruct_handle(SOLVEstruct) call f_create_SuperMatrix_handle(A) call f_create_SuperLUStat_handle(stat) ! Initialize the SuperLU_DIST process grid nprow = 2 npcol = 2 call f_superlu_gridinit(MPI_COMM_WORLD, nprow, npcol, grid) ! Bail out if I do not belong in the grid. call get_GridInfo(grid, iam=iam) if ( iam >= nprow * npcol ) then go to 100 endif if ( iam == 0 ) then write(*,*) ' Process grid ', nprow, ' X ', npcol endif ! Read Harwell-Boeing matrix, and adjust the pointers and indices ! to 0-based indexing, as required by C routines. if ( iam == 0 ) then open(file = "../EXAMPLE/g20.rua", status = "old", unit = 5) call dhbcode1(m, n, nnz, values, rowind, colptr) close(unit = 5) ! do i = 1, n+1 colptr(i) = colptr(i) - 1 enddo do i = 1, nnz rowind(i) = rowind(i) - 1 enddo endif ! Distribute the matrix to the process gird call f_dcreate_dist_matrix(A, m, n, nnz, values, rowind, colptr, grid) ! Setup the right hand side call get_CompRowLoc_Matrix(A, nrow_loc=ldb) do i = 1, ldb b(i) = 1.0 enddo nrhs = 1 ldb4 = ldb ! Set the default input options call f_set_default_options(options) ! Change one or more options ! call set_superlu_options(options,Fact=FACTORED) ! call set_superlu_options(options,ParSymbFact=YES) ! Initialize ScalePermstruct and LUstruct call get_SuperMatrix(A, nrow=m, ncol=n) call f_ScalePermstructInit(m, n, ScalePermstruct) call f_LUstructInit(m, n, LUstruct) ! Initialize the statistics variables call f_PStatInit(stat) ! Call the linear equation solver call f_pdgssvx(options, A, ScalePermstruct, b, ldb4, nrhs, & grid, LUstruct, SOLVEstruct, berr, stat, info) if (info == 0) then write (*,*) 'Backward error: ', (berr(i), i = 1, nrhs) else write(*,*) 'INFO from f_pdgssvx = ', info endif ! Deallocate the storage allocated by SuperLU_DIST call f_PStatFree(stat) call f_Destroy_CompRowLoc_Mat_dist(A) call f_ScalePermstructFree(ScalePermstruct) call f_Destroy_LU(n, grid, LUstruct) call f_LUstructFree(LUstruct) call get_superlu_options(options, SolveInitialized=init) if (init == YES) then call f_dSolveFinalize(options, SOLVEstruct) endif ! Release the SuperLU process grid 100 call f_superlu_gridexit(grid) ! Deallocate the C structures pointed to by the Fortran handles call f_destroy_gridinfo_handle(grid) call f_destroy_options_handle(options) call f_destroy_ScalePerm_handle(ScalePermstruct) call f_destroy_LUstruct_handle(LUstruct) call f_destroy_SOLVEstruct_handle(SOLVEstruct) call f_destroy_SuperMatrix_handle(A) call f_destroy_SuperLUStat_handle(stat) ! Check malloc ! call f_check_malloc(iam) ! Terminate the MPI execution environment call mpi_finalize(ierr) stop end SuperLU_DIST_5.3.0/FORTRAN/sp_ienv.c0000644013363400111340000000650713233431301015566 0ustar xiaoyessg/*! \file Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. The source code is distributed under BSD license, see the file License.txt at the top-level directory. */ /*! @file * \brief Chooses machine-dependent parameters for the local environment */ /* * File name: sp_ienv.c * History: Modified from lapack routine ILAENV */ #include "superlu_ddefs.h" #include "machines.h" /*! \brief
Purpose ======= sp_ienv_dist() is inquired to choose machine-dependent parameters for the local environment. See ISPEC for a description of the parameters. This version provides a set of parameters which should give good, but not optimal, performance on many of the currently available computers. Users are encouraged to modify this subroutine to set the tuning parameters for their particular machine using the option and problem size information in the arguments. Arguments ========= ISPEC (input) int Specifies the parameter to be returned as the value of SP_IENV_DIST. = 1: the panel size w; a panel consists of w consecutive columns of matrix A in the process of Gaussian elimination. The best value depends on machine's cache characters. = 2: the relaxation parameter relax; if the number of nodes (columns) in a subtree of the elimination tree is less than relax, this subtree is considered as one supernode, regardless of the their row structures. = 3: the maximum size for a supernode, which must be greater than or equal to relaxation parameter (see case 2); = 4: the minimum row dimension for 2-D blocking to be used; = 5: the minimum column dimension for 2-D blocking to be used; = 6: the estimated fills factor for the adjacency structures of L and U, compared with A; = 7: the minimum value of the product M*N*K for a GEMM call to be off-loaded to accelerator (e.g., GPU, Xeon Phi). (SP_IENV_DIST) (output) int >= 0: the value of the parameter specified by ISPEC < 0: if SP_IENV_DIST = -k, the k-th argument had an illegal value. =====================================================================
*/ #include #include int_t sp_ienv_dist(int_t ispec) { // printf(" this function called\n"); int i; char* ttemp; switch (ispec) { #if ( MACH==CRAY_T3E ) case 2: return (6); case 3: return (30); #elif ( MACH==IBM ) case 2: return (20); case 3: return (100); #else case 2: ttemp = getenv("NREL"); if(ttemp) { return(atoi(ttemp)); } else return 1; case 3: ttemp = getenv("NSUP"); if(ttemp) { return(atoi(ttemp)); } else return 128; #endif case 6: return (5); case 7: ttemp = getenv ("N_GEMM"); if (ttemp) return atoi (ttemp); else return 10000; } /* Invalid value for ISPEC */ i = 1; xerr_dist("sp_ienv", &i); return 0; } /* sp_ienv_dist */ SuperLU_DIST_5.3.0/run_cmake_build.sh0000755013363400111340000000433413234132644016333 0ustar xiaoyessg#!/bin/bash ## if [ !$?NERSC_HOST ] if [ -z $NERSC_HOST ] then echo "NERSC_HOST undefined" elif [ "$NERSC_HOST" == "edison" ] then export PARMETIS_ROOT=~/Edison/lib/parmetis-4.0.3 export PARMETIS_BUILD_DIR=${PARMETIS_ROOT}/static-build/Linux-x86_64 cmake .. \ -DUSE_XSDK_DEFAULTS=FALSE\ -DTPL_PARMETIS_INCLUDE_DIRS="${PARMETIS_ROOT}/include;${PARMETIS_ROOT}/metis/include" \ -DTPL_PARMETIS_LIBRARIES="${PARMETIS_BUILD_DIR}/libparmetis/libparmetis.a;${PARMETIS_BUILD_DIR}/libmetis/libmetis.a" \ -DCMAKE_C_FLAGS="-std=c99 -fPIC" \ -DCMAKE_Fortran_COMPILER=ftn \ -Denable_blaslib=OFF \ # -DTPL_BLAS_LIBRARIES="-mkl" \ -DBUILD_SHARED_LIBS=OFF \ -DCMAKE_INSTALL_PREFIX=. # -DCMAKE_EXE_LINKER_FLAGS="-shared" elif [ "$NERSC_HOST" == "cori" ] then export PARMETIS_ROOT=~/Cori/lib/parmetis-4.0.3 # export PARMETIS_BUILD_DIR=${PARMETIS_ROOT}/shared-build setenv PARMETIS_BUILD_DIR ${PARMETIS_ROOT}/static-build/Linux-x86_64 cmake .. \ -DUSE_XSDK_DEFAULTS=TRUE\ -DTPL_PARMETIS_INCLUDE_DIRS="${PARMETIS_ROOT}/include;${PARMETIS_ROOT}/metis/include" \ -DTPL_PARMETIS_LIBRARIES="${PARMETIS_BUILD_DIR}/libparmetis/libparmetis.a;${PARMETIS_BUILD_DIR}/libmetis/libmetis.a" \ -Denable_blaslib=OFF \ -DCMAKE_Fortran_COMPILER=ftn \ -DCMAKE_C_FLAGS="-std=c99 -fPIC" \ # -DCMAKE_EXE_LINKER_FLAGS="-shared" \ -DCMAKE_INSTALL_PREFIX=. fi THISHOST=`hostname -s` echo "host: $THISHOST" if [ "$THISHOST" == "ssg1" ] then rm -fr ssg1-build; mkdir ssg1-build; cd ssg1-build; # export PARMETIS_ROOT=~/lib/static/64-bit/parmetis-4.0.3 export PARMETIS_ROOT=~/lib/static/parmetis-4.0.3 export PARMETIS_BUILD_DIR=${PARMETIS_ROOT}/build/Linux-x86_64 echo "ParMetis root: $PARMETIS_ROOT" cmake .. \ -DTPL_PARMETIS_INCLUDE_DIRS="${PARMETIS_ROOT}/include;${PARMETIS_ROOT}/metis/include" \ -DTPL_PARMETIS_LIBRARIES="${PARMETIS_BUILD_DIR}/libparmetis/libparmetis.a;${PARMETIS_BUILD_DIR}/libmetis/libmetis.a" \ -DCMAKE_C_FLAGS="-std=c99 -g -DPRNTlevel=0 -DDEBUGlevel=0" \ -Denable_blaslib=OFF \ -DBUILD_SHARED_LIBS=OFF \ -DCMAKE_C_COMPILER=mpicc \ -DCMAKE_INSTALL_PREFIX=. # -Denable_parmetislib=OFF\ # -DXSDK_INDEX_SIZE=64 \ fi # make VERBOSE=1 # make test SuperLU_DIST_5.3.0/make.inc0000644013363400111340000000276013234133023014252 0ustar xiaoyessg############################################################################ # # Program: SuperLU_DIST # # Module: make.inc # # Purpose: Top-level Definitions # # Creation date: March 1, 2016 version 5.0.0 # # Modified: October 13, 2017 version 5.2.1 # # ############################################################################ # # The name of the libraries to be created/linked to # SuperLUroot = /home/xiaoye/Dropbox/Codes/SuperLU/SuperLU_DIST_5.3.0/ssg1-build DSUPERLULIB = $(SuperLUroot)/SRC/libsuperlu_dist.a LIBS = $(DSUPERLULIB) /usr/lib/libf77blas.so /usr/lib/libatlas.so /home/xiaoye/lib/static/parmetis-4.0.3/build/Linux-x86_64/libparmetis/libparmetis.a /home/xiaoye/lib/static/parmetis-4.0.3/build/Linux-x86_64/libmetis/libmetis.a # # The archiver and the flag(s) to use when building archive (library) # If your system has no ranlib, set RANLIB = echo. # ARCH = /usr/bin/ar ARCHFLAGS = cr RANLIB = /usr/bin/ranlib CC = /home/xiaoye/mpich-install/bin/mpicc CFLAGS = -O3 -DNDEBUG -I/home/xiaoye/lib/static/parmetis-4.0.3/metis/include -I/home/xiaoye/lib/static/parmetis-4.0.3/include -DUSE_VENDOR_BLAS -fopenmp -std=c99 -g -DPRNTlevel=0 -DDEBUGlevel=0 # CFLAGS += -D # CFLAGS += #XSDK_INDEX_SIZE = 64 ## 64-bit integer NOOPTS = -O0 FORTRAN = /usr/bin/gfortran LOADER = $(CC) LOADOPTS = -Wl,-rpath, -Wl,-rpath -Wl,/home/xiaoye/mpich-install/lib -Wl,--enable-new-dtags -fopenmp SuperLU_DIST_5.3.0/.gitignore0000644013363400111340000000043313233431301014624 0ustar xiaoyessg*~ # You have to ignore this generated file or git will complain that it is an # unknown file! /make.inc # If the instructions are telling people to create this build dir under the # source tree, you had better put in an ignore for this. /build/ # Ignore Testing/ folder Testing/ SuperLU_DIST_5.3.0/License.txt0000644013363400111340000000322113233431301014755 0ustar xiaoyessgCopyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy) All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: (1) Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. (2) Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. (3) Neither the name of Lawrence Berkeley National Laboratory, U.S. Dept. of Energy nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. SuperLU_DIST_5.3.0/make.inc.in0000644013363400111340000000220413233431301014647 0ustar xiaoyessg############################################################################ # # Program: SuperLU_DIST # # Module: make.inc # # Purpose: Top-level Definitions # # Creation date: March 1, 2016 version 5.0.0 # # Modified: October 13, 2017 version 5.2.1 # # ############################################################################ # # The name of the libraries to be created/linked to # SuperLUroot = ${CMAKE_INSTALL_PREFIX} DSUPERLULIB = $(SuperLUroot)/SRC/${PROJECT_NAME_LIB_EXPORT} LIBS = $(DSUPERLULIB) ${BLAS_LIB_EXPORT} ${PARMETIS_LIB_EXPORT} # # The archiver and the flag(s) to use when building archive (library) # If your system has no ranlib, set RANLIB = echo. # ARCH = @CMAKE_AR@ ARCHFLAGS = cr RANLIB = @CMAKE_RANLIB@ CC = @CMAKE_C_COMPILER@ CFLAGS = @CMAKE_C_FLAGS_RELEASE@ @CMAKE_C_FLAGS@ # CFLAGS += -D${DirDefs} # CFLAGS += @COMPILE_DEFINITIONS@ #XSDK_INDEX_SIZE = 64 ## 64-bit integer NOOPTS = -O0 FORTRAN = @CMAKE_Fortran_COMPILER@ LOADER = $(CC) LOADOPTS = -Wl,-rpath,@OpenMP_CXX_FLAGS@ @CMAKE_EXE_LINKER_FLAGS@ SuperLU_DIST_5.3.0/.travis.yml0000644013363400111340000000566013233431301014754 0ustar xiaoyessglanguage: cpp compiler: gcc os: linux sudo: required branches: only: - master notifications: slack: rooms: - ecpsparsesolvers:nBWC0jcAd7B1j9whHUYcaVJO on_failure: always on_success: never env: matrix: - TEST_NUMBER=1" - TEST_NUMBER=2" - TEST_NUMBER=3" - TEST_NUMBER=4" - TEST_NUMBER=5" - TEST_NUMBER=6" - TEST_NUMBER=7" - TEST_NUMBER=8" - TEST_NUMBER=9" - TEST_NUMBER=10" - TEST_NUMBER=11" - TEST_NUMBER=12" - TEST_NUMBER=13" - TEST_NUMBER=14" git: depth: 1 before_install: - export BLUE="\033[34;1m" - mkdir installDir - printf "${BLUE} GC; Installing gcc-6 via apt\n" - sudo apt-get update - sudo apt-get install build-essential software-properties-common -y - sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y - sudo apt-get update - sudo apt-get install gcc-6 g++-6 -y - sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-6 60 --slave /usr/bin/g++ g++ /usr/bin/g++-6 - export CXX="g++-6" - export CC="gcc-6" - printf "${BLUE} GC; Done installing gcc-6 via apt\n" - printf "${BLUE} GC; Installing gfortran via apt\n" - sudo apt-get install gfortran-6 -y - sudo update-alternatives --install /usr/bin/gfortran gfortran /usr/bin/gfortran-6 60 - printf "${BLUE} GC; Done installing gfortran via apt\n" - printf "${BLUE} GC; Installing openmpi\n" - sudo apt-get install openmpi-bin libopenmpi-dev - printf "${BLUE} GC; Done installing openmpi\n" - printf "${BLUE} GC; Installing BLASfrom apt\n" - sudo apt-get install libblas-dev - export BLAS_LIB=/usr/lib/libblas/libblas.so - printf "${BLUE} GC; Done installing BLASfrom apt\n" - printf "${BLUE} GC; Installing ParMetis-4.0 from source\n" - cd $TRAVIS_BUILD_DIR/installDir - wget http://glaros.dtc.umn.edu/gkhome/fetch/sw/parmetis/parmetis-4.0.3.tar.gz - tar -xf parmetis-4.0.3.tar.gz - cd parmetis-4.0.3/ - mkdir install - make config shared=1 cc=mpicc cxx=mpic++ prefix=$PWD/install - make install > make_parmetis_install.log 2>&1 - printf "${BLUE} GC; Done installing ParMetis-4.0 from source\n" install: - export BLUE="\033[34;1m" - printf "${BLUE} GC; Installing superlu_dist from source\n" - cd $TRAVIS_BUILD_DIR - mkdir build - cd build - | cmake .. \ -DTPL_PARMETIS_INCLUDE_DIRS="$TRAVIS_BUILD_DIR/installDir/parmetis-4.0.3/metis/include;$TRAVIS_BUILD_DIR/installDir/parmetis-4.0.3/install/include" \ -DTPL_PARMETIS_LIBRARIES="$TRAVIS_BUILD_DIR/installDir/parmetis-4.0.3/install/lib/libparmetis.so" \ -DCMAKE_C_FLAGS="-std=c99 -DPRNTlevel=0 -DPROFlevel=0 -DDEBUGlevel=0" \ -DTPL_BLAS_LIBRARIES="$BLAS_LIB" \ -Denable_blaslib=OFF \ -DBUILD_SHARED_LIBS=OFF \ -DCMAKE_C_COMPILER=cc \ -DCMAKE_INSTALL_PREFIX=. \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF - make - make install - printf "${BLUE} GC; Done installing superlu_dist from source\n" script: - cd $TRAVIS_BUILD_DIR - ./.travis_tests.sh SuperLU_DIST_5.3.0/INSTALL/0000755013363400111340000000000013233431301013742 5ustar xiaoyessgSuperLU_DIST_5.3.0/INSTALL/Makefile0000644013363400111340000000121413233431301015400 0ustar xiaoyessginclude ../make.inc all: testdmach testsmach testtimer #install.out testdmach: dmach_dist.o dmachtst.o $(LOADER) $(LOADOPTS) -o testdmach dmach_dist.o dmachtst.o testsmach: smach_dist.o smachtst.o $(LOADER) $(LOADOPTS) -o testsmach smach_dist.o smachtst.o testtimer: superlu_timer.o timertst.o $(LOADER) $(LOADOPTS) -o testtimer superlu_timer.o timertst.o install.out: @echo Testing machines parameters and timer csh install.csh smach_dist.o: ../SRC/smach_dist.c ; $(CC) -c $< dmach_dist.o: ../SRC/dmach_dist.c ; $(CC) -c $< superlu_timer.o: ../SRC/superlu_timer.c ; $(CC) -c $< .c.o: $(CC) $(CFLAGS) -c $< clean: rm -f *.o test* *.out SuperLU_DIST_5.3.0/INSTALL/install.csh0000644013363400111340000000040413233431301016105 0ustar xiaoyessg#! /bin/csh set ofile = install.out # output file echo '---- SINGLE PRECISION' >! $ofile ./testsmach >> $ofile echo '' >> $ofile echo ---- DOUBLE PRECISION >> $ofile ./testdmach >> $ofile echo '' >> $ofile echo ---- TIMER >> $ofile ./testtimer >> $ofile SuperLU_DIST_5.3.0/INSTALL/smachtst.c0000644013363400111340000000234013233431301015733 0ustar xiaoyessg#include int main() { /* Local variables */ float base, emin, prec, emax, rmin, rmax, t, sfmin; extern float smach_dist(char *); float rnd, eps; eps = smach_dist("Epsilon"); sfmin = smach_dist("Safe minimum"); base = smach_dist("Base"); prec = smach_dist("Precision"); t = smach_dist("Number of digits in mantissa"); rnd = smach_dist("Rounding mode"); emin = smach_dist("Minnimum exponent"); rmin = smach_dist("Underflow threshold"); emax = smach_dist("Largest exponent"); rmax = smach_dist("Overflow threshold"); printf(" Epsilon = %e\n", eps); printf(" Safe minimum = %e\n", sfmin); printf(" Base = %.0f\n", base); printf(" Precision = %e\n", prec); printf(" Number of digits in mantissa = %.0f\n", t); printf(" Rounding mode = %.0f\n", rnd); printf(" Minimum exponent = %.0f\n", emin); printf(" Underflow threshold = %e\n", rmin); printf(" Largest exponent = %.0f\n", emax); printf(" Overflow threshold = %e\n", rmax); printf(" Reciprocal of safe minimum = %e\n", 1./sfmin); return 0; } SuperLU_DIST_5.3.0/INSTALL/dmachtst.c0000644013363400111340000000234313233431301015717 0ustar xiaoyessg#include int main() { /* Local variables */ double base, emin, prec, emax, rmin, rmax, t, sfmin; extern double dmach_dist(char *); double rnd, eps; eps = dmach_dist("Epsilon"); sfmin = dmach_dist("Safe minimum"); base = dmach_dist("Base"); prec = dmach_dist("Precision"); t = dmach_dist("Number of digits in mantissa"); rnd = dmach_dist("Rounding mode"); emin = dmach_dist("Minnimum exponent"); rmin = dmach_dist("Underflow threshold"); emax = dmach_dist("Largest exponent"); rmax = dmach_dist("Overflow threshold"); printf(" Epsilon = %e\n", eps); printf(" Safe minimum = %e\n", sfmin); printf(" Base = %.0f\n", base); printf(" Precision = %e\n", prec); printf(" Number of digits in mantissa = %.0f\n", t); printf(" Rounding mode = %.0f\n", rnd); printf(" Minimum exponent = %.0f\n", emin); printf(" Underflow threshold = %e\n", rmin); printf(" Largest exponent = %.0f\n", emax); printf(" Overflow threshold = %e\n", rmax); printf(" Reciprocal of safe minimum = %e\n", 1./sfmin); return 0; } SuperLU_DIST_5.3.0/INSTALL/timertst.c0000644013363400111340000000346013233431301015764 0ustar xiaoyessg#include #include void mysub(int n, double *x, double *y) { return; } int main(int argc, char *argv[]) { /* Parameters */ #define NMAX 100 #define ITS 10000 int i, j; double alpha, avg, t1, t2, tnotim; double x[NMAX], y[NMAX]; extern double SuperLU_timer_dist_(); MPI_Init( &argc, &argv ); /* Initialize X and Y */ for (i = 0; i < NMAX; ++i) { x[i] = 1.0 / (double)(i+1); y[i] = (double)(NMAX - i) / (double)NMAX; } alpha = 0.315; /* Time 1,000,000 DAXPY operations */ t1 = SuperLU_timer_dist_(); for (j = 0; j < ITS; ++j) { for (i = 0; i < NMAX; ++i) y[i] += alpha * x[i]; alpha = -alpha; } t2 = SuperLU_timer_dist_(); printf("Time for 1,000,000 DAXPY ops = %10.3g seconds\n", t2-t1); if ( t2-t1 > 0. ) printf("DAXPY performance rate = %10.3g mflops\n", 2./(t2-t1)); else printf("*** Error: Time for operations was zero\n"); tnotim = t2 - t1; /* Time 1,000,000 DAXPY operations with SuperLU_timer_() in the outer loop */ t1 = SuperLU_timer_dist_(); for (j = 0; j < ITS; ++j) { for (i = 0; i < NMAX; ++i) y[i] += alpha * x[i]; alpha = -alpha; t2 = SuperLU_timer_dist_(); } /* Compute the time in milliseconds used by an average call to SuperLU_timer_(). */ printf("Including DSECND, time = %10.3g seconds\n", t2-t1); avg = ( (t2 - t1) - tnotim )*1000. / (double)ITS; printf("Average time for DSECND = %10.3g milliseconds\n", avg); /* Compute the equivalent number of floating point operations used by an average call to DSECND. */ if ( tnotim > 0. ) printf("Equivalent floating point ops = %10.3g ops\n", 1000.*avg / tnotim); mysub(NMAX, x, y); MPI_Finalize(); return 0; } SuperLU_DIST_5.3.0/superlu_dist.pc.in0000644013363400111340000000063713233431301016315 0ustar xiaoyessgprefix=@CMAKE_INSTALL_PREFIX@ libdir=@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@ includedir=@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_INCLUDEDIR@ Name: @CMAKE_PROJECT_NAME@ Description: Distributed-memory direct solution of sparse systems of linear equations Version: @PROJECT_VERSION@ URL: http://crd-legacy.lbl.gov/~xiaoye/SuperLU/ Libs: -L${libdir} -lsuperlu Libs.private: @BLAS_LIB@ -lm Cflags: -I${includedir} SuperLU_DIST_5.3.0/DoxyConfig0000644013363400111340000015631313233431301014641 0ustar xiaoyessg# Doxyfile 1.5.5 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project # # All text after a hash (#) is considered a comment and will be ignored # The format is: # TAG = value [value, ...] # For lists items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (" ") #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- # This tag specifies the encoding used for all characters in the config file # that follow. The default is UTF-8 which is also the encoding used for all # text before the first occurrence of this tag. Doxygen uses libiconv (or the # iconv built into libc) for the transcoding. See # http://www.gnu.org/software/libiconv for the list of possible encodings. DOXYFILE_ENCODING = UTF-8 # The PROJECT_NAME tag is a single word (or a sequence of words surrounded # by quotes) that should identify the project. PROJECT_NAME = SuperLU Distributed # The PROJECT_NUMBER tag can be used to enter a project or revision number. # This could be handy for archiving the generated documentation or # if some version control system is used. PROJECT_NUMBER = 5.2.2 e # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # base path where the generated documentation will be put. # If a relative path is entered, it will be relative to the location # where doxygen was started. If left blank the current directory will be used. OUTPUT_DIRECTORY = DOC # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create # 4096 sub-directories (in 2 levels) under the output directory of each output # format and will distribute the generated files over these directories. # Enabling this option can be useful when feeding doxygen a huge amount of # source files, where putting all generated files in the same directory would # otherwise cause performance problems for the file system. CREATE_SUBDIRS = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. # The default language is English, other supported languages are: # Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, # Croatian, Czech, Danish, Dutch, Farsi, Finnish, French, German, Greek, # Hungarian, Italian, Japanese, Japanese-en (Japanese with English messages), # Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, Polish, # Portuguese, Romanian, Russian, Serbian, Slovak, Slovene, Spanish, Swedish, # and Ukrainian. OUTPUT_LANGUAGE = English # If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will # include brief member descriptions after the members that are listed in # the file and class documentation (similar to JavaDoc). # Set to NO to disable this. BRIEF_MEMBER_DESC = YES # If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend # the brief description of a member or function before the detailed description. # Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the # brief descriptions will be completely suppressed. REPEAT_BRIEF = NO # This tag implements a quasi-intelligent brief description abbreviator # that is used to form the text in various listings. Each string # in this list, if found as the leading text of the brief description, will be # stripped from the text and the result after processing the whole list, is # used as the annotated text. Otherwise, the brief description is used as-is. # If left blank, the following values are used ("$name" is automatically # replaced with the name of the entity): "The $name class" "The $name widget" # "The $name file" "is" "provides" "specifies" "contains" # "represents" "a" "an" "the" ABBREVIATE_BRIEF = # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then # Doxygen will generate a detailed section even if there is only a brief # description. ALWAYS_DETAILED_SEC = NO # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all # inherited members of a class in the documentation of that class as if those # members were ordinary class members. Constructors, destructors and assignment # operators of the base classes will not be shown. INLINE_INHERITED_MEMB = NO # If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full # path before files name in the file list and in the header files. If set # to NO the shortest path that makes the file name unique will be used. FULL_PATH_NAMES = YES # If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag # can be used to strip a user-defined part of the path. Stripping is # only done if one of the specified strings matches the left-hand part of # the path. The tag can be used to show relative paths in the file list. # If left blank the directory from which doxygen is run is used as the # path to strip. STRIP_FROM_PATH = # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of # the path mentioned in the documentation of a class, which tells # the reader which header file to include in order to use a class. # If left blank only the name of the header file containing the class # definition is used. Otherwise one should specify the include paths that # are normally passed to the compiler using the -I flag. STRIP_FROM_INC_PATH = # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter # (but less readable) file names. This can be useful is your file systems # doesn't support long names like on DOS, Mac, or CD-ROM. SHORT_NAMES = NO # If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen # will interpret the first line (until the first dot) of a JavaDoc-style # comment as the brief description. If set to NO, the JavaDoc # comments will behave just like regular Qt-style comments # (thus requiring an explicit @brief command for a brief description.) JAVADOC_AUTOBRIEF = NO # If the QT_AUTOBRIEF tag is set to YES then Doxygen will # interpret the first line (until the first dot) of a Qt-style # comment as the brief description. If set to NO, the comments # will behave just like regular Qt-style comments (thus requiring # an explicit \brief command for a brief description.) QT_AUTOBRIEF = NO # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen # treat a multi-line C++ special comment block (i.e. a block of //! or /// # comments) as a brief description. This used to be the default behaviour. # The new default is to treat a multi-line C++ comment block as a detailed # description. Set this tag to YES if you prefer the old behaviour instead. MULTILINE_CPP_IS_BRIEF = NO # If the DETAILS_AT_TOP tag is set to YES then Doxygen # will output the detailed description near the top, like JavaDoc. # If set to NO, the detailed description appears after the member # documentation. DETAILS_AT_TOP = NO # If the INHERIT_DOCS tag is set to YES (the default) then an undocumented # member inherits the documentation from any documented member that it # re-implements. INHERIT_DOCS = YES # If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce # a new page for each member. If set to NO, the documentation of a member will # be part of the file/class/namespace that contains it. SEPARATE_MEMBER_PAGES = NO # The TAB_SIZE tag can be used to set the number of spaces in a tab. # Doxygen uses this value to replace tabs by spaces in code fragments. TAB_SIZE = 8 # This tag can be used to specify a number of aliases that acts # as commands in the documentation. An alias has the form "name=value". # For example adding "sideeffect=\par Side Effects:\n" will allow you to # put the command \sideeffect (or @sideeffect) in the documentation, which # will result in a user-defined paragraph with heading "Side Effects:". # You can put \n's in the value part of an alias to insert newlines. ALIASES = # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C # sources only. Doxygen will then generate output that is more tailored for C. # For instance, some of the names that are used will be different. The list # of all members will be omitted, etc. OPTIMIZE_OUTPUT_FOR_C = YES # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java # sources only. Doxygen will then generate output that is more tailored for # Java. For instance, namespaces will be presented as packages, qualified # scopes will look different, etc. OPTIMIZE_OUTPUT_JAVA = NO # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran # sources only. Doxygen will then generate output that is more tailored for # Fortran. OPTIMIZE_FOR_FORTRAN = YES # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL # sources. Doxygen will then generate output that is tailored for # VHDL. OPTIMIZE_OUTPUT_VHDL = NO # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want # to include (a tag file for) the STL sources as input, then you should # set this tag to YES in order to let doxygen match functions declarations and # definitions whose arguments contain STL classes (e.g. func(std::string); v.s. # func(std::string) {}). This also make the inheritance and collaboration # diagrams that involve STL classes more complete and accurate. BUILTIN_STL_SUPPORT = NO # If you use Microsoft's C++/CLI language, you should set this option to YES to # enable parsing support. CPP_CLI_SUPPORT = NO # Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. # Doxygen will parse them like normal C++ but will assume all classes use public # instead of private inheritance when no explicit protection keyword is present. SIP_SUPPORT = NO # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC # tag is set to YES, then doxygen will reuse the documentation of the first # member in the group (if any) for the other members of the group. By default # all members of a group must be documented explicitly. DISTRIBUTE_GROUP_DOC = NO # Set the SUBGROUPING tag to YES (the default) to allow class member groups of # the same type (for instance a group of public functions) to be put as a # subgroup of that type (e.g. under the Public Functions section). Set it to # NO to prevent subgrouping. Alternatively, this can be done per class using # the \nosubgrouping command. SUBGROUPING = YES # When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum # is documented as struct, union, or enum with the name of the typedef. So # typedef struct TypeS {} TypeT, will appear in the documentation as a struct # with name TypeT. When disabled the typedef will appear as a member of a file, # namespace, or class. And the struct will be named TypeS. This can typically # be useful for C code in case the coding convention dictates that all compound # types are typedef'ed and only the typedef is referenced, never the tag name. TYPEDEF_HIDES_STRUCT = NO #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- # If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in # documentation are documented, even if no documentation was available. # Private class members and static file members will be hidden unless # the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES EXTRACT_ALL = YES # If the EXTRACT_PRIVATE tag is set to YES all private members of a class # will be included in the documentation. EXTRACT_PRIVATE = YES # If the EXTRACT_STATIC tag is set to YES all static members of a file # will be included in the documentation. EXTRACT_STATIC = YES # If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) # defined locally in source files will be included in the documentation. # If set to NO only classes defined in header files are included. EXTRACT_LOCAL_CLASSES = YES # This flag is only useful for Objective-C code. When set to YES local # methods, which are defined in the implementation section but not in # the interface are included in the documentation. # If set to NO (the default) only methods in the interface are included. EXTRACT_LOCAL_METHODS = NO # If this flag is set to YES, the members of anonymous namespaces will be # extracted and appear in the documentation as a namespace called # 'anonymous_namespace{file}', where file will be replaced with the base # name of the file that contains the anonymous namespace. By default # anonymous namespace are hidden. EXTRACT_ANON_NSPACES = NO # If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all # undocumented members of documented classes, files or namespaces. # If set to NO (the default) these members will be included in the # various overviews, but no documentation section is generated. # This option has no effect if EXTRACT_ALL is enabled. HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. # If set to NO (the default) these classes will be included in the various # overviews. This option has no effect if EXTRACT_ALL is enabled. HIDE_UNDOC_CLASSES = NO # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all # friend (class|struct|union) declarations. # If set to NO (the default) these declarations will be included in the # documentation. HIDE_FRIEND_COMPOUNDS = NO # If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any # documentation blocks found inside the body of a function. # If set to NO (the default) these blocks will be appended to the # function's detailed documentation block. HIDE_IN_BODY_DOCS = NO # The INTERNAL_DOCS tag determines if documentation # that is typed after a \internal command is included. If the tag is set # to NO (the default) then the documentation will be excluded. # Set it to YES to include the internal documentation. INTERNAL_DOCS = NO # If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate # file names in lower-case letters. If set to YES upper-case letters are also # allowed. This is useful if you have classes or files whose names only differ # in case and if your file system supports case sensitive file names. Windows # and Mac users are advised to set this option to NO. CASE_SENSE_NAMES = YES # If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen # will show members with their full class and namespace scopes in the # documentation. If set to YES the scope will be hidden. HIDE_SCOPE_NAMES = NO # If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen # will put a list of the files that are included by a file in the documentation # of that file. SHOW_INCLUDE_FILES = YES # If the INLINE_INFO tag is set to YES (the default) then a tag [inline] # is inserted in the documentation for inline members. INLINE_INFO = YES # If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen # will sort the (detailed) documentation of file and class members # alphabetically by member name. If set to NO the members will appear in # declaration order. SORT_MEMBER_DOCS = YES # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the # brief documentation of file, namespace and class members alphabetically # by member name. If set to NO (the default) the members will appear in # declaration order. SORT_BRIEF_DOCS = NO # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the # hierarchy of group names into alphabetical order. If set to NO (the default) # the group names will appear in their defined order. SORT_GROUP_NAMES = NO # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be # sorted by fully-qualified names, including namespaces. If set to # NO (the default), the class list will be sorted only by class name, # not including the namespace part. # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. # Note: This option applies only to the class list, not to the # alphabetical list. SORT_BY_SCOPE_NAME = NO # The GENERATE_TODOLIST tag can be used to enable (YES) or # disable (NO) the todo list. This list is created by putting \todo # commands in the documentation. GENERATE_TODOLIST = YES # The GENERATE_TESTLIST tag can be used to enable (YES) or # disable (NO) the test list. This list is created by putting \test # commands in the documentation. GENERATE_TESTLIST = YES # The GENERATE_BUGLIST tag can be used to enable (YES) or # disable (NO) the bug list. This list is created by putting \bug # commands in the documentation. GENERATE_BUGLIST = YES # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or # disable (NO) the deprecated list. This list is created by putting # \deprecated commands in the documentation. GENERATE_DEPRECATEDLIST= YES # The ENABLED_SECTIONS tag can be used to enable conditional # documentation sections, marked by \if sectionname ... \endif. ENABLED_SECTIONS = # The MAX_INITIALIZER_LINES tag determines the maximum number of lines # the initial value of a variable or define consists of for it to appear in # the documentation. If the initializer consists of more lines than specified # here it will be hidden. Use a value of 0 to hide initializers completely. # The appearance of the initializer of individual variables and defines in the # documentation can be controlled using \showinitializer or \hideinitializer # command in the documentation regardless of this setting. MAX_INITIALIZER_LINES = 30 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated # at the bottom of the documentation of classes and structs. If set to YES the # list will mention the files that were used to generate the documentation. SHOW_USED_FILES = YES # If the sources in your project are distributed over multiple directories # then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy # in the documentation. The default is NO. SHOW_DIRECTORIES = NO # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from # the version control system). Doxygen will invoke the program by executing (via # popen()) the command , where is the value of # the FILE_VERSION_FILTER tag, and is the name of an input file # provided by doxygen. Whatever the program writes to standard output # is used as the file version. See the manual for examples. FILE_VERSION_FILTER = #--------------------------------------------------------------------------- # configuration options related to warning and progress messages #--------------------------------------------------------------------------- # The QUIET tag can be used to turn on/off the messages that are generated # by doxygen. Possible values are YES and NO. If left blank NO is used. QUIET = NO # The WARNINGS tag can be used to turn on/off the warning messages that are # generated by doxygen. Possible values are YES and NO. If left blank # NO is used. WARNINGS = YES # If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings # for undocumented members. If EXTRACT_ALL is set to YES then this flag will # automatically be disabled. WARN_IF_UNDOCUMENTED = YES # If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for # potential errors in the documentation, such as not documenting some # parameters in a documented function, or documenting parameters that # don't exist or using markup commands wrongly. WARN_IF_DOC_ERROR = YES # This WARN_NO_PARAMDOC option can be abled to get warnings for # functions that are documented, but have no documentation for their parameters # or return value. If set to NO (the default) doxygen will only warn about # wrong or incomplete parameter documentation, but not about the absence of # documentation. WARN_NO_PARAMDOC = NO # The WARN_FORMAT tag determines the format of the warning messages that # doxygen can produce. The string should contain the $file, $line, and $text # tags, which will be replaced by the file and line number from which the # warning originated and the warning text. Optionally the format may contain # $version, which will be replaced by the version of the file (if it could # be obtained via FILE_VERSION_FILTER) WARN_FORMAT = "$file:$line: $text" # The WARN_LOGFILE tag can be used to specify a file to which warning # and error messages should be written. If left blank the output is written # to stderr. WARN_LOGFILE = #--------------------------------------------------------------------------- # configuration options related to the input files #--------------------------------------------------------------------------- # The INPUT tag can be used to specify the files and/or directories that contain # documented source files. You may enter file names like "myfile.cpp" or # directories like "/usr/src/myproject". Separate the files or directories # with spaces. INPUT = SRC/ EXAMPLE/ FORTRAN/ TEST/ # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is # also the default input encoding. Doxygen uses libiconv (or the iconv built # into libc) for the transcoding. See http://www.gnu.org/software/libiconv for # the list of possible encodings. INPUT_ENCODING = UTF-8 # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp # and *.h) to filter out the source-files in the directories. If left # blank the following patterns are tested: # *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx # *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90 FILE_PATTERNS = # The RECURSIVE tag can be used to turn specify whether or not subdirectories # should be searched for input files as well. Possible values are YES and NO. # If left blank NO is used. RECURSIVE = YES # The EXCLUDE tag can be used to specify files and/or directories that should # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. EXCLUDE = # The EXCLUDE_SYMLINKS tag can be used select whether or not files or # directories that are symbolic links (a Unix filesystem feature) are excluded # from the input. EXCLUDE_SYMLINKS = NO # If the value of the INPUT tag contains directories, you can use the # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude # certain files from those directories. Note that the wildcards are matched # against the file with absolute path, so to exclude all test directories # for example use the pattern */test/* EXCLUDE_PATTERNS = # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, # AClass::ANamespace, ANamespace::*Test EXCLUDE_SYMBOLS = # The EXAMPLE_PATH tag can be used to specify one or more files or # directories that contain example code fragments that are included (see # the \include command). EXAMPLE_PATH = # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp # and *.h) to filter out the source-files in the directories. If left # blank all files are included. EXAMPLE_PATTERNS = # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be # searched for input files to be used with the \include or \dontinclude # commands irrespective of the value of the RECURSIVE tag. # Possible values are YES and NO. If left blank NO is used. EXAMPLE_RECURSIVE = NO # The IMAGE_PATH tag can be used to specify one or more files or # directories that contain image that are included in the documentation (see # the \image command). IMAGE_PATH = # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program # by executing (via popen()) the command , where # is the value of the INPUT_FILTER tag, and is the name of an # input file. Doxygen will then use the output that the filter program writes # to standard output. If FILTER_PATTERNS is specified, this tag will be # ignored. INPUT_FILTER = # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern # basis. Doxygen will compare the file name with each pattern and apply the # filter if there is a match. The filters are a list of the form: # pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further # info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER # is applied to all files. FILTER_PATTERNS = # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using # INPUT_FILTER) will be used to filter the input files when producing source # files to browse (i.e. when SOURCE_BROWSER is set to YES). FILTER_SOURCE_FILES = NO #--------------------------------------------------------------------------- # configuration options related to source browsing #--------------------------------------------------------------------------- # If the SOURCE_BROWSER tag is set to YES then a list of source files will # be generated. Documented entities will be cross-referenced with these sources. # Note: To get rid of all source code in the generated output, make sure also # VERBATIM_HEADERS is set to NO. SOURCE_BROWSER = NO # Setting the INLINE_SOURCES tag to YES will include the body # of functions and classes directly in the documentation. INLINE_SOURCES = NO # Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct # doxygen to hide any special comment blocks from generated source code # fragments. Normal C and C++ comments will always remain visible. STRIP_CODE_COMMENTS = YES # If the REFERENCED_BY_RELATION tag is set to YES (the default) # then for each documented function all documented # functions referencing it will be listed. REFERENCED_BY_RELATION = NO # If the REFERENCES_RELATION tag is set to YES (the default) # then for each documented function all documented entities # called/used by that function will be listed. REFERENCES_RELATION = NO # If the REFERENCES_LINK_SOURCE tag is set to YES (the default) # and SOURCE_BROWSER tag is set to YES, then the hyperlinks from # functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will # link to the source code. Otherwise they will link to the documentstion. REFERENCES_LINK_SOURCE = YES # If the USE_HTAGS tag is set to YES then the references to source code # will point to the HTML generated by the htags(1) tool instead of doxygen # built-in source browser. The htags tool is part of GNU's global source # tagging system (see http://www.gnu.org/software/global/global.html). You # will need version 4.8.6 or higher. USE_HTAGS = NO # If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen # will generate a verbatim copy of the header file for each class for # which an include is specified. Set to NO to disable this. VERBATIM_HEADERS = YES #--------------------------------------------------------------------------- # configuration options related to the alphabetical class index #--------------------------------------------------------------------------- # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index # of all compounds will be generated. Enable this if the project # contains a lot of classes, structs, unions or interfaces. ALPHABETICAL_INDEX = NO # If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then # the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns # in which this list will be split (can be a number in the range [1..20]) COLS_IN_ALPHA_INDEX = 5 # In case all classes in a project start with a common prefix, all # classes will be put under the same header in the alphabetical index. # The IGNORE_PREFIX tag can be used to specify one or more prefixes that # should be ignored while generating the index headers. IGNORE_PREFIX = #--------------------------------------------------------------------------- # configuration options related to the HTML output #--------------------------------------------------------------------------- # If the GENERATE_HTML tag is set to YES (the default) Doxygen will # generate HTML output. GENERATE_HTML = YES # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `html' will be used as the default path. HTML_OUTPUT = html # The HTML_FILE_EXTENSION tag can be used to specify the file extension for # each generated HTML page (for example: .htm,.php,.asp). If it is left blank # doxygen will generate files with .html extension. HTML_FILE_EXTENSION = .html # The HTML_HEADER tag can be used to specify a personal HTML header for # each generated HTML page. If it is left blank doxygen will generate a # standard header. HTML_HEADER = # The HTML_FOOTER tag can be used to specify a personal HTML footer for # each generated HTML page. If it is left blank doxygen will generate a # standard footer. HTML_FOOTER = # The HTML_STYLESHEET tag can be used to specify a user-defined cascading # style sheet that is used by each HTML page. It can be used to # fine-tune the look of the HTML output. If the tag is left blank doxygen # will generate a default style sheet. Note that doxygen will try to copy # the style sheet file to the HTML output directory, so don't put your own # stylesheet in the HTML output directory as well, or it will be erased! HTML_STYLESHEET = # If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, # files or namespaces will be aligned in HTML using tables. If set to # NO a bullet list will be used. HTML_ALIGN_MEMBERS = YES # If the GENERATE_HTMLHELP tag is set to YES, additional index files # will be generated that can be used as input for tools like the # Microsoft HTML help workshop to generate a compiled HTML help file (.chm) # of the generated HTML documentation. GENERATE_HTMLHELP = NO # If the GENERATE_DOCSET tag is set to YES, additional index files # will be generated that can be used as input for Apple's Xcode 3 # integrated development environment, introduced with OSX 10.5 (Leopard). # To create a documentation set, doxygen will generate a Makefile in the # HTML output directory. Running make will produce the docset in that # directory and running "make install" will install the docset in # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find # it at startup. GENERATE_DOCSET = NO # When GENERATE_DOCSET tag is set to YES, this tag determines the name of the # feed. A documentation feed provides an umbrella under which multiple # documentation sets from a single provider (such as a company or product suite) # can be grouped. DOCSET_FEEDNAME = "Doxygen generated docs" # When GENERATE_DOCSET tag is set to YES, this tag specifies a string that # should uniquely identify the documentation set bundle. This should be a # reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen # will append .docset to the name. DOCSET_BUNDLE_ID = org.doxygen.Project # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. For this to work a browser that supports # JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox # Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). HTML_DYNAMIC_SECTIONS = NO # If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can # be used to specify the file name of the resulting .chm file. You # can add a path in front of the file if the result should not be # written to the html output directory. CHM_FILE = # If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can # be used to specify the location (absolute path including file name) of # the HTML help compiler (hhc.exe). If non-empty doxygen will try to run # the HTML help compiler on the generated index.hhp. HHC_LOCATION = # If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag # controls if a separate .chi index file is generated (YES) or that # it should be included in the master .chm file (NO). GENERATE_CHI = NO # If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag # controls whether a binary table of contents is generated (YES) or a # normal table of contents (NO) in the .chm file. BINARY_TOC = NO # The TOC_EXPAND flag can be set to YES to add extra items for group members # to the contents of the HTML help documentation and to the tree view. TOC_EXPAND = NO # The DISABLE_INDEX tag can be used to turn on/off the condensed index at # top of each HTML page. The value NO (the default) enables the index and # the value YES disables it. DISABLE_INDEX = NO # This tag can be used to set the number of enum values (range [1..20]) # that doxygen will group on one line in the generated HTML documentation. ENUM_VALUES_PER_LINE = 4 # If the GENERATE_TREEVIEW tag is set to YES, a side panel will be # generated containing a tree-like index structure (just like the one that # is generated for HTML Help). For this to work a browser that supports # JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+, # Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are # probably better off using the HTML help feature. GENERATE_TREEVIEW = NO # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be # used to set the initial width (in pixels) of the frame in which the tree # is shown. TREEVIEW_WIDTH = 250 #--------------------------------------------------------------------------- # configuration options related to the LaTeX output #--------------------------------------------------------------------------- # If the GENERATE_LATEX tag is set to YES (the default) Doxygen will # generate Latex output. GENERATE_LATEX = NO # The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `latex' will be used as the default path. LATEX_OUTPUT = latex # The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be # invoked. If left blank `latex' will be used as the default command name. LATEX_CMD_NAME = latex # The MAKEINDEX_CMD_NAME tag can be used to specify the command name to # generate index for LaTeX. If left blank `makeindex' will be used as the # default command name. MAKEINDEX_CMD_NAME = makeindex # If the COMPACT_LATEX tag is set to YES Doxygen generates more compact # LaTeX documents. This may be useful for small projects and may help to # save some trees in general. COMPACT_LATEX = NO # The PAPER_TYPE tag can be used to set the paper type that is used # by the printer. Possible values are: a4, a4wide, letter, legal and # executive. If left blank a4wide will be used. PAPER_TYPE = a4wide # The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX # packages that should be included in the LaTeX output. EXTRA_PACKAGES = # The LATEX_HEADER tag can be used to specify a personal LaTeX header for # the generated latex document. The header should contain everything until # the first chapter. If it is left blank doxygen will generate a # standard header. Notice: only use this tag if you know what you are doing! LATEX_HEADER = # If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated # is prepared for conversion to pdf (using ps2pdf). The pdf file will # contain links (just like the HTML output) instead of page references # This makes the output suitable for online browsing using a pdf viewer. PDF_HYPERLINKS = YES # If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of # plain latex in the generated Makefile. Set this option to YES to get a # higher quality PDF documentation. USE_PDFLATEX = YES # If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. # command to the generated LaTeX files. This will instruct LaTeX to keep # running if errors occur, instead of asking the user for help. # This option is also used when generating formulas in HTML. LATEX_BATCHMODE = NO # If LATEX_HIDE_INDICES is set to YES then doxygen will not # include the index chapters (such as File Index, Compound Index, etc.) # in the output. LATEX_HIDE_INDICES = NO #--------------------------------------------------------------------------- # configuration options related to the RTF output #--------------------------------------------------------------------------- # If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output # The RTF output is optimized for Word 97 and may not look very pretty with # other RTF readers or editors. GENERATE_RTF = NO # The RTF_OUTPUT tag is used to specify where the RTF docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `rtf' will be used as the default path. RTF_OUTPUT = rtf # If the COMPACT_RTF tag is set to YES Doxygen generates more compact # RTF documents. This may be useful for small projects and may help to # save some trees in general. COMPACT_RTF = NO # If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated # will contain hyperlink fields. The RTF file will # contain links (just like the HTML output) instead of page references. # This makes the output suitable for online browsing using WORD or other # programs which support those fields. # Note: wordpad (write) and others do not support links. RTF_HYPERLINKS = NO # Load stylesheet definitions from file. Syntax is similar to doxygen's # config file, i.e. a series of assignments. You only have to provide # replacements, missing definitions are set to their default value. RTF_STYLESHEET_FILE = # Set optional variables used in the generation of an rtf document. # Syntax is similar to doxygen's config file. RTF_EXTENSIONS_FILE = #--------------------------------------------------------------------------- # configuration options related to the man page output #--------------------------------------------------------------------------- # If the GENERATE_MAN tag is set to YES (the default) Doxygen will # generate man pages GENERATE_MAN = NO # The MAN_OUTPUT tag is used to specify where the man pages will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `man' will be used as the default path. MAN_OUTPUT = man # The MAN_EXTENSION tag determines the extension that is added to # the generated man pages (default is the subroutine's section .3) MAN_EXTENSION = .3 # If the MAN_LINKS tag is set to YES and Doxygen generates man output, # then it will generate one additional man file for each entity # documented in the real man page(s). These additional files # only source the real man page, but without them the man command # would be unable to find the correct page. The default is NO. MAN_LINKS = NO #--------------------------------------------------------------------------- # configuration options related to the XML output #--------------------------------------------------------------------------- # If the GENERATE_XML tag is set to YES Doxygen will # generate an XML file that captures the structure of # the code including all documentation. GENERATE_XML = NO # The XML_OUTPUT tag is used to specify where the XML pages will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `xml' will be used as the default path. XML_OUTPUT = xml # The XML_SCHEMA tag can be used to specify an XML schema, # which can be used by a validating XML parser to check the # syntax of the XML files. XML_SCHEMA = # The XML_DTD tag can be used to specify an XML DTD, # which can be used by a validating XML parser to check the # syntax of the XML files. XML_DTD = # If the XML_PROGRAMLISTING tag is set to YES Doxygen will # dump the program listings (including syntax highlighting # and cross-referencing information) to the XML output. Note that # enabling this will significantly increase the size of the XML output. XML_PROGRAMLISTING = YES #--------------------------------------------------------------------------- # configuration options for the AutoGen Definitions output #--------------------------------------------------------------------------- # If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will # generate an AutoGen Definitions (see autogen.sf.net) file # that captures the structure of the code including all # documentation. Note that this feature is still experimental # and incomplete at the moment. GENERATE_AUTOGEN_DEF = NO #--------------------------------------------------------------------------- # configuration options related to the Perl module output #--------------------------------------------------------------------------- # If the GENERATE_PERLMOD tag is set to YES Doxygen will # generate a Perl module file that captures the structure of # the code including all documentation. Note that this # feature is still experimental and incomplete at the # moment. GENERATE_PERLMOD = NO # If the PERLMOD_LATEX tag is set to YES Doxygen will generate # the necessary Makefile rules, Perl scripts and LaTeX code to be able # to generate PDF and DVI output from the Perl module output. PERLMOD_LATEX = NO # If the PERLMOD_PRETTY tag is set to YES the Perl module output will be # nicely formatted so it can be parsed by a human reader. This is useful # if you want to understand what is going on. On the other hand, if this # tag is set to NO the size of the Perl module output will be much smaller # and Perl will parse it just the same. PERLMOD_PRETTY = YES # The names of the make variables in the generated doxyrules.make file # are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. # This is useful so different doxyrules.make files included by the same # Makefile don't overwrite each other's variables. PERLMOD_MAKEVAR_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the preprocessor #--------------------------------------------------------------------------- # If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will # evaluate all C-preprocessor directives found in the sources and include # files. ENABLE_PREPROCESSING = YES # If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro # names in the source code. If set to NO (the default) only conditional # compilation will be performed. Macro expansion can be done in a controlled # way by setting EXPAND_ONLY_PREDEF to YES. MACRO_EXPANSION = NO # If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES # then the macro expansion is limited to the macros specified with the # PREDEFINED and EXPAND_AS_DEFINED tags. EXPAND_ONLY_PREDEF = NO # If the SEARCH_INCLUDES tag is set to YES (the default) the includes files # in the INCLUDE_PATH (see below) will be search if a #include is found. SEARCH_INCLUDES = YES # The INCLUDE_PATH tag can be used to specify one or more directories that # contain include files that are not input files but should be processed by # the preprocessor. INCLUDE_PATH = # You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard # patterns (like *.h and *.hpp) to filter out the header-files in the # directories. If left blank, the patterns specified with FILE_PATTERNS will # be used. INCLUDE_FILE_PATTERNS = # The PREDEFINED tag can be used to specify one or more macro names that # are defined before the preprocessor is started (similar to the -D option of # gcc). The argument of the tag is a list of macros of the form: name # or name=definition (no spaces). If the definition and the = are # omitted =1 is assumed. To prevent a macro definition from being # undefined via #undef or recursively expanded use the := operator # instead of the = operator. PREDEFINED = # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then # this tag can be used to specify a list of macro names that should be expanded. # The macro definition that is found in the sources will be used. # Use the PREDEFINED tag if you want to use a different macro definition. EXPAND_AS_DEFINED = # If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then # doxygen's preprocessor will remove all function-like macros that are alone # on a line, have an all uppercase name, and do not end with a semicolon. Such # function macros are typically used for boiler-plate code, and will confuse # the parser if not removed. SKIP_FUNCTION_MACROS = YES #--------------------------------------------------------------------------- # Configuration::additions related to external references #--------------------------------------------------------------------------- # The TAGFILES option can be used to specify one or more tagfiles. # Optionally an initial location of the external documentation # can be added for each tagfile. The format of a tag file without # this location is as follows: # TAGFILES = file1 file2 ... # Adding location for the tag files is done as follows: # TAGFILES = file1=loc1 "file2 = loc2" ... # where "loc1" and "loc2" can be relative or absolute paths or # URLs. If a location is present for each tag, the installdox tool # does not have to be run to correct the links. # Note that each tag file must have a unique name # (where the name does NOT include the path) # If a tag file is not located in the directory in which doxygen # is run, you must also specify the path to the tagfile here. TAGFILES = # When a file name is specified after GENERATE_TAGFILE, doxygen will create # a tag file that is based on the input files it reads. GENERATE_TAGFILE = # If the ALLEXTERNALS tag is set to YES all external classes will be listed # in the class index. If set to NO only the inherited external classes # will be listed. ALLEXTERNALS = NO # If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed # in the modules index. If set to NO, only the current project's groups will # be listed. EXTERNAL_GROUPS = YES # The PERL_PATH should be the absolute path and name of the perl script # interpreter (i.e. the result of `which perl'). PERL_PATH = /usr/bin/perl #--------------------------------------------------------------------------- # Configuration options related to the dot tool #--------------------------------------------------------------------------- # If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will # generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base # or super classes. Setting the tag to NO turns the diagrams off. Note that # this option is superseded by the HAVE_DOT option below. This is only a # fallback. It is recommended to install and use dot, since it yields more # powerful graphs. CLASS_DIAGRAMS = YES # You can define message sequence charts within doxygen comments using the \msc # command. Doxygen will then run the mscgen tool (see # http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the # documentation. The MSCGEN_PATH tag allows you to specify the directory where # the mscgen tool resides. If left empty the tool is assumed to be found in the # default search path. MSCGEN_PATH = # If set to YES, the inheritance and collaboration graphs will hide # inheritance and usage relations if the target is undocumented # or is not a class. HIDE_UNDOC_RELATIONS = YES # If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is # available from the path. This tool is part of Graphviz, a graph visualization # toolkit from AT&T and Lucent Bell Labs. The other options in this section # have no effect if this option is set to NO (the default) HAVE_DOT = YES # If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen # will generate a graph for each documented class showing the direct and # indirect inheritance relations. Setting this tag to YES will force the # the CLASS_DIAGRAMS tag to NO. CLASS_GRAPH = YES # If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen # will generate a graph for each documented class showing the direct and # indirect implementation dependencies (inheritance, containment, and # class references variables) of the class with other documented classes. COLLABORATION_GRAPH = YES # If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen # will generate a graph for groups, showing the direct groups dependencies GROUP_GRAPHS = YES # If the UML_LOOK tag is set to YES doxygen will generate inheritance and # collaboration diagrams in a style similar to the OMG's Unified Modeling # Language. UML_LOOK = NO # If set to YES, the inheritance and collaboration graphs will show the # relations between templates and their instances. TEMPLATE_RELATIONS = NO # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT # tags are set to YES then doxygen will generate a graph for each documented # file showing the direct and indirect include dependencies of the file with # other documented files. INCLUDE_GRAPH = YES # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and # HAVE_DOT tags are set to YES then doxygen will generate a graph for each # documented header file showing the documented files that directly or # indirectly include this file. INCLUDED_BY_GRAPH = YES # If the CALL_GRAPH and HAVE_DOT options are set to YES then # doxygen will generate a call dependency graph for every global function # or class method. Note that enabling this option will significantly increase # the time of a run. So in most cases it will be better to enable call graphs # for selected functions only using the \callgraph command. CALL_GRAPH = YES # If the CALLER_GRAPH and HAVE_DOT tags are set to YES then # doxygen will generate a caller dependency graph for every global function # or class method. Note that enabling this option will significantly increase # the time of a run. So in most cases it will be better to enable caller # graphs for selected functions only using the \callergraph command. CALLER_GRAPH = YES # If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen # will graphical hierarchy of all classes instead of a textual one. GRAPHICAL_HIERARCHY = YES # If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES # then doxygen will show the dependencies a directory has on other directories # in a graphical way. The dependency relations are determined by the #include # relations between the files in the directories. DIRECTORY_GRAPH = YES # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images # generated by dot. Possible values are png, jpg, or gif # If left blank png will be used. DOT_IMAGE_FORMAT = png # The tag DOT_PATH can be used to specify the path where the dot tool can be # found. If left blank, it is assumed the dot tool can be found in the path. DOT_PATH = # The DOTFILE_DIRS tag can be used to specify one or more directories that # contain dot files that are included in the documentation (see the # \dotfile command). DOTFILE_DIRS = # The MAX_DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of # nodes that will be shown in the graph. If the number of nodes in a graph # becomes larger than this value, doxygen will truncate the graph, which is # visualized by representing a node as a red box. Note that doxygen if the # number of direct children of the root node in a graph is already larger than # DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note # that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. DOT_GRAPH_MAX_NODES = 50 # The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the # graphs generated by dot. A depth value of 3 means that only nodes reachable # from the root by following a path via at most 3 edges will be shown. Nodes # that lay further from the root node will be omitted. Note that setting this # option to 1 or 2 may greatly reduce the computation time needed for large # code bases. Also note that the size of a graph can be further restricted by # DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. MAX_DOT_GRAPH_DEPTH = 0 # Set the DOT_TRANSPARENT tag to YES to generate images with a transparent # background. This is enabled by default, which results in a transparent # background. Warning: Depending on the platform used, enabling this option # may lead to badly anti-aliased labels on the edges of a graph (i.e. they # become hard to read). DOT_TRANSPARENT = YES # Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output # files in one run (i.e. multiple -o and -T options on the command line). This # makes dot run faster, but since only newer versions of dot (>1.8.10) # support this, this feature is disabled by default. DOT_MULTI_TARGETS = NO # If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will # generate a legend page explaining the meaning of the various boxes and # arrows in the dot generated graphs. GENERATE_LEGEND = YES # If the DOT_CLEANUP tag is set to YES (the default) Doxygen will # remove the intermediate dot files that are used to generate # the various graphs. DOT_CLEANUP = YES #--------------------------------------------------------------------------- # Configuration::additions related to the search engine #--------------------------------------------------------------------------- # The SEARCHENGINE tag specifies whether or not a search engine should be # used. If set to NO the values of all tags below this one will be ignored. SEARCHENGINE = NO SuperLU_DIST_5.3.0/run_cmake_build.csh0000755013363400111340000000410313233431301016457 0ustar xiaoyessg#!/bin/csh if ( ! $?NERSC_HOST ) then echo "NERSC_HOST undefined" else if ( "$NERSC_HOST" == "edison" ) then setenv PARMETIS_ROOT ~/Edison/lib/parmetis-4.0.3 # setenv PARMETIS_BUILD_DIR ${PARMETIS_ROOT}/shared-build setenv PARMETIS_BUILD_DIR ${PARMETIS_ROOT}/static-build/Linux-x86_64 cmake .. \ -DUSE_XSDK_DEFAULTS=FALSE\ -DTPL_PARMETIS_INCLUDE_DIRS="${PARMETIS_ROOT}/include;${PARMETIS_ROOT}/metis/include" \ -DTPL_PARMETIS_LIBRARIES="${PARMETIS_BUILD_DIR}/libparmetis/libparmetis.a;${PARMETIS_BUILD_DIR}/libmetis/libmetis.a" \ -DCMAKE_C_FLAGS="-std=c99 -fPIC" \ # -DCMAKE_EXE_LINKER_FLAGS="-shared" \ -DCMAKE_Fortran_COMPILER=ftn \ -Denable_blaslib=OFF \ # -DTPL_BLAS_LIBRARIES=" " \ -DBUILD_SHARED_LIBS=OFF \ -DCMAKE_INSTALL_PREFIX=.. endif if ( "$NERSC_HOST" == "cori" ) then setenv PARMETIS_ROOT ~/Cori/lib/parmetis-4.0.3 setenv PARMETIS_BUILD_DIR ${PARMETIS_ROOT}/shared-build # setenv PARMETIS_BUILD_DIR ${PARMETIS_ROOT}/static-build/Linux-x86_64 cmake .. \ -DUSE_XSDK_DEFAULTS=TRUE\ -DTPL_PARMETIS_INCLUDE_DIRS="${PARMETIS_ROOT}/include;${PARMETIS_ROOT}/metis/include" \ -DTPL_PARMETIS_LIBRARIES="${PARMETIS_BUILD_DIR}/libparmetis/libparmetis.so;${PARMETIS_BUILD_DIR}/libmetis/libmetis.so" \ -Denable_blaslib=OFF \ -DCMAKE_Fortran_COMPILER=ftn \ -DCMAKE_C_FLAGS="-std=c99 -fPIC" \ -DCMAKE_EXE_LINKER_FLAGS="-shared" \ -DCMAKE_INSTALL_PREFIX=.. endif endif set THISHOST=`hostname -s` #echo $THISHOST if ( "$THISHOST" == "ssg1" ) then setenv PARMETIS_ROOT ~/lib/static/parmetis-4.0.3 setenv PARMETIS_BUILD_DIR ${PARMETIS_ROOT}/build/Linux-x86_64 echo $PARMETIS_ROOT cmake .. \ -DTPL_PARMETIS_INCLUDE_DIRS="${PARMETIS_ROOT}/include;${PARMETIS_ROOT}/metis/include" \ -DTPL_PARMETIS_LIBRARIES="${PARMETIS_BUILD_DIR}/libparmetis/libparmetis.a;${PARMETIS_BUILD_DIR}/libmetis/libmetis.a" \ -DCMAKE_C_FLAGS="-std=c99 -g" \ -Denable_blaslib=OFF \ -DBUILD_SHARED_LIBS=OFF \ -DCMAKE_C_COMPILER=mpicc \ -DCMAKE_INSTALL_PREFIX=.. endif # make VERBOSE=1 # make test SuperLU_DIST_5.3.0/MAKE_INC/0000755013363400111340000000000013233431301014042 5ustar xiaoyessgSuperLU_DIST_5.3.0/MAKE_INC/make.ssg10000644013363400111340000000272013233431301015557 0ustar xiaoyessg############################################################################ # # Program: SuperLU_DIST # # Module: make.inc # # Purpose: Top-level Definitions # # Creation date: March 1, 2016 version 5.0.0 # # Modified: # # ############################################################################ # # The name of the libraries to be created/linked to # SuperLUroot = /home/xiaoye/Dropbox/Codes/SuperLU/superlu_dist.git DSUPERLULIB = $(SuperLUroot)/lib/libsuperlu_dist.a LIBS = $(DSUPERLULIB) /usr/lib/libf77blas.so /usr/lib/libatlas.so /home/xiaoye/lib/static/parmetis-4.0.3/build/Linux-x86_64/libparmetis/libparmetis.a /home/xiaoye/lib/static/parmetis-4.0.3/build/Linux-x86_64/libmetis/libmetis.a # # The archiver and the flag(s) to use when building archive (library) # If your system has no ranlib, set RANLIB = echo. # ARCH = /usr/bin/ar ARCHFLAGS = cr RANLIB = /usr/bin/ranlib CC = /home/xiaoye/mpich-install/bin/mpicc CFLAGS = -O3 -DNDEBUG -I/home/xiaoye/lib/static/parmetis-4.0.3/metis/include -I/home/xiaoye/lib/static/parmetis-4.0.3/include -fopenmp -DUSE_VENDOR_BLAS -DDEBUGlevel=0 -DPRNTlevel=0 -std=c99 -g #XSDK_INDEX_SIZE = 64 ## 64-bit integer NOOPTS = -O0 FORTRAN = /usr/bin/gfortran LOADER = $(CC) LOADOPTS = -Wl,-rpath,/home/xiaoye/Dropbox/Codes/SuperLU/superlu_dist.git/lib -Wl,-rpath -Wl,/home/xiaoye/mpich-install/lib -Wl,--enable-new-dtags -fopenmp SuperLU_DIST_5.3.0/MAKE_INC/make.xc300000644013363400111340000000557613233431301015473 0ustar xiaoyessg############################################################################ # # Program: SuperLU_DIST # # Module: make.inc # # Purpose: Top-level Definitions # # Creation date: February 4, 1999 version alpha # # Modified: September 1, 1999 version 1.0 # March 15, 2003 version 2.0 # November 1, 2007 version 2.1 # September 1, 2011 version 3.0 # ############################################################################ # # The machine (platform) identifier to append to the library names # ## edison at NERSC PLAT = _xc30 VERSION = 5.0.0 # # The name of the libraries to be created/linked to # DSuperLUroot = ${HOME}/Release_Codes/SuperLU_DIST_${VERSION} DSUPERLULIB = $(DSuperLUroot)/lib/libsuperlu_dist_${VERSION}.a # BLASDEF = -DUSE_VENDOR_BLAS BLASLIB = # ############################################################################ ## parmetis 4.x.x, 32-bit integer PARMETIS_DIR := ${HOME}/Edison/lib/parmetis-4.0.3 ## parmetis 4.x.x, 64-bit integer # PARMETIS_DIR := ${HOME}/Edison/lib/parmetis-4.0.3_64 METISLIB := -L${PARMETIS_DIR}/build/Linux-x86_64/libmetis -lmetis PARMETISLIB := -L${PARMETIS_DIR}/build/Linux-x86_64/libparmetis -lparmetis I_PARMETIS := -I${PARMETIS_DIR}/include -I${PARMETIS_DIR}/metis/include ############################################################################ # Define the required Fortran libraries, if you use C compiler to link # FLIBS = -lpgf90 -lpgf90_rpm1 ## for PGI compiler #FLIBS = -lifport -lifcore ## for Intel compiler # Define all the libraries LIBS = $(DSUPERLULIB) $(BLASLIB) $(PARMETISLIB) $(METISLIB) $(FLIBS) # # The archiver and the flag(s) to use when building archive (library) # If your system has no ranlib, set RANLIB = echo. # ARCH = ar ARCHFLAGS = cr RANLIB = ranlib ############################################################################ # C compiler setup CC = cc # CFLAGS should be set to be the C flags that include optimization CFLAGS = -fast -m64 -std=c99 -Wall -openmp \ $(I_PARMETIS) -DDEBUGlevel=0 -DPRNTlevel=0 -DPROFlevel=0 \ # uncomment the following to use 64-bit integer #XSDK_INDEX_SIZE = 64 ## 64-bit integer # NOOPTS should be set to be the C flags that turn off any optimization NOOPTS = -O0 -std=c99 ############################################################################ # FORTRAN compiler setup FORTRAN = ftn F90FLAGS = -fast #-Mipa=fast,safe # uncomment the following to use 64-bit integer # F90FLAGS += -i8 ############################################################################ LOADER = $(CC) LOADOPTS = -openmp ############################################################################ # C preprocessor defs for compilation (-DNoChange, -DAdd_, or -DUpCase) # # Need follow the convention of how C calls a Fortran routine. # CDEFS = -DAdd_ SuperLU_DIST_5.3.0/MAKE_INC/make.xt4_pathscale0000644013363400111340000000477213233431301017456 0ustar xiaoyessg############################################################################ # # Program: SuperLU_DIST # # Module: make.inc # # Purpose: Top-level Definitions # # Creation date: February 4, 1999 version alpha # # Modified: September 1, 1999 version 1.0 # March 15, 2003 version 2.0 # November 1, 2007 version 2.1 # ############################################################################ # # The machine (platform) identifier to append to the library names # PLAT = _xt4 # # The name of the libraries to be created/linked to # DSuperLUroot = ${HOME}/Release_Codes/SuperLU_DIST_4.2 DSUPERLULIB = $(DSuperLUroot)/lib/libsuperlu_dist_4.2.a # BLASDEF = -DUSE_VENDOR_BLAS BLASLIB = ############################################################################ ## parmetis 4.x.x, 32-bit integer PARMETIS_DIR := ${HOME}/Carver/lib/parmetis-4.0.3 ## parmetis 4.x.x, 64-bit integer # PARMETIS_DIR := ${HOME}/Carver/lib/parmetis-4.0.3_64 METISLIB := -L${PARMETIS_DIR}/build/Linux-x86_64/libmetis -lmetis PARMETISLIB := -L${PARMETIS_DIR}/build/Linux-x86_64/libparmetis -lparmetis I_PARMETIS := -I${PARMETIS_DIR}/include -I${PARMETIS_DIR}/metis/include ############################################################################ # Define the required Fortran libraries, if you use C compiler to link FLIBS = -lpathfortran # Define all the libraries LIBS = $(DSUPERLULIB) $(BLASLIB) $(PARMETISLIB) $(METISLIB) $(FLIBS) # # The archiver and the flag(s) to use when building archive (library) # If your system has no ranlib, set RANLIB = echo. # ARCH = ar ARCHFLAGS = cr RANLIB = ranlib ############################################################################ # C compiler setup CC = cc # CFLAGS should be set to be the C flags that include optimization CFLAGS = -Ofast $(I_PARMETIS) -DDEBUGlevel=0 -DPRNTlevel=1 -DPROFlevel=0 # # NOOPTS should be set to be the C flags that turn off any optimization NOOPTS = -O0 -ipa ############################################################################ # FORTRAN compiler setup FORTRAN = ftn F90FLAGS = -Ofast ############################################################################ LOADER = cc LOADOPTS = -ipa ############################################################################ # C preprocessor defs for compilation (-DNoChange, -DAdd_, or -DUpCase) # # Need follow the convention of how C calls a Fortran routine. # CDEFS = -DAdd_ SuperLU_DIST_5.3.0/MAKE_INC/make.altix0000644013363400111340000000522013233431301016021 0ustar xiaoyessg############################################################################ # # Program: SuperLU_DIST # # Module: make.inc # # Purpose: Top-level Definitions # # Creation date: April 10, 2006 version 2.0 # # Modified: November 1, 2007 version 2.1 # ############################################################################ # # The machine (platform) identifier to append to the library names # PLAT = _altix # # The name of the libraries to be created/linked to # DSuperLUroot = ${HOME}/Release_Codes/SuperLU_DIST_4.2 DSUPERLULIB = $(DSuperLUroot)/lib/libsuperlu_dist_4.2.a MKLHOME = /usr/common/intel/mkl/8.1.014 BLASDEF = -DUSE_VENDOR_BLAS BLASLIB = -L${MKLHOME}/lib/64 -lmkl_ipf -lguide ############################################################################ ## parmetis 4.x.x, 32-bit integer PARMETIS_DIR := ${HOME}/Carver/lib/parmetis-4.0.3 ## parmetis 4.x.x, 64-bit integer # PARMETIS_DIR := ${HOME}/Carver/lib/parmetis-4.0.3_64 METISLIB := -L${PARMETIS_DIR}/build/Linux-x86_64/libmetis -lmetis PARMETISLIB := -L${PARMETIS_DIR}/build/Linux-x86_64/libparmetis -lparmetis I_PARMETIS := -I${PARMETIS_DIR}/include -I${PARMETIS_DIR}/metis/include ############################################################################ # Define the required Fortran libraries, if you use C compiler to link FLIBS = # Define all the libraries LIBS = $(DSUPERLULIB) $(BLASLIB) $(PARMETISLIB) $(METISLIB) \ -lmpi -lm -L/usr/common/intel/fc/8.1.029/lib -lifcore # # The archiver and the flag(s) to use when building archive (library) # If your system has no ranlib, set RANLIB = echo. # ARCH = ar ARCHFLAGS = crv RANLIB = ranlib ####################################################################### # C compiler setup CC = icc ISA = -ftz -mp # CFLAGS should be set to be the C flags that include optimization CFLAGS = $(ISA) $(I_PARMETIS) -O3 -DDEBUGlevel=0 -DPRNTlevel=0 # # NOOPTS should be set to be the C flags that turn off any optimization NOOPTS = $(ISA) -O0 ############################################################################ # FORTRAN compiler setup FORTRAN = ifort FFLAGS = $(CFLAGS) F90FLAGS = -r8 -check all -save -Dmpi -ftz ############################################################################ LOADER = icc LOADOPTS = $(CFLAGS) # ############################################################################ # C preprocessor defs for compilation (-DNoChange, -DAdd_, or -DUpCase) # # Need follow the convention of how C calls a Fortran routine. # CDEFS = -DAdd_ SuperLU_DIST_5.3.0/MAKE_INC/make.mac-x0000644013363400111340000000265013233431301015711 0ustar xiaoyessg############################################################################ # # Program: SuperLU_DIST # # Module: make.inc # # Purpose: Top-level Definitions # # Creation date: March 1, 2016 version 5.0.0 # # Modified: # # ############################################################################ # # The name of the libraries to be created/linked to # SuperLUroot = /Users/xsli/Dropbox/Codes/SuperLU/superlu_dist.git/ DSUPERLULIB = $(SuperLUroot)/lib/libsuperlu_dist.a BLASLIB = $(SuperLUroot)/CBLAS/libblas.a LIBS = $(DSUPERLULIB) ${BLASLIB} /Users/xsli/lib/parmetis-4.0.3/build/Darwin-x86_64/libparmetis/libparmetis.a /Users/xsli/lib/parmetis-4.0.3/build/Darwin-x86_64/libmetis/libmetis.a # # The archiver and the flag(s) to use when building archive (library) # If your system has no ranlib, set RANLIB = echo. # ARCH = /usr/bin/ar ARCHFLAGS = cr RANLIB = /usr/bin/ranlib CC = /Users/xsli/lib/mpich2-install/bin/mpicc CFLAGS = -O3 -DNDEBUG -I/Users/xsli/lib/parmetis-4.0.3/metis/include -I/Users/xsli/lib/parmetis-4.0.3/include -DDEBUGlevel=0 -DPRNTlevel=0 -std=c99 -g #CFLAGS += -openmp #XSDK_INDEX_SIZE = 64 ## 64-bit integer # CFLAGS += NOOPTS = -O0 FORTRAN = /usr/local/bin/gfortran LOADER = $(CC) LOADOPTS = -openmp # LOADOPTS = -Wl,-rpath,/Users/xsli/Dropbox/Codes/SuperLU/superlu_dist.git/xsli-build/lib SuperLU_DIST_5.3.0/MAKE_INC/make.mpich0000644013363400111340000000301513233431301016000 0ustar xiaoyessg############################################################################ # # Program: SuperLU_DIST # # Module: make.inc # # Purpose: Top-level Definitions # # Creation date: March 1, 2016 version 5.0.0 # # Modified: # # ############################################################################ # # The name of the libraries to be created/linked to # VERSION = 5.1.3 SuperLUroot = /home/xiaoye/Dropbox/Codes/SuperLU/SuperLU_DIST_${VERSION} DSUPERLULIB = $(SuperLUroot)/lib/libsuperlu_dist.a # BLASDEF = -DUSE_VENDOR_BLAS PARMETIS_DIR := ${HOME}/lib/static/parmetis-4.0.3 I_PARMETIS := -I${PARMETIS_DIR}/include -I${PARMETIS_DIR}/metis/include METISLIB := -L${PARMETIS_DIR}/build/Linux-x86_64/libmetis -lmetis PARMETISLIB := -L${PARMETIS_DIR}/build/Linux-x86_64/libparmetis -lparmetis LIBS = $(DSUPERLULIB) /usr/lib/libf77blas.so /usr/lib/libatlas.so \ ${PARMETISLIB} ${METISLIB} # # The archiver and the flag(s) to use when building archive (library) # If your system has no ranlib, set RANLIB = echo. # ARCH = /usr/bin/ar ARCHFLAGS = cr RANLIB = /usr/bin/ranlib CC = /home/xiaoye/mpich-install/bin/mpicc CFLAGS = -DNDEBUG -DUSE_VENDOR_BLAS -DAdd_ -DDEBUGlevel=0 -DPRNTlevel=0 -std=c99 -fPIC -g ${I_PARMETIS} #XSDK_INDEX_SIZE = 64 ## 64-bit integer # CFLAGS += NOOPTS = -O0 FORTRAN = /usr/bin/gfortran LOADER = $(CC) LOADOPTS = -Wl,-rpath=/home/xiaoye/Dropbox/Codes/SuperLU/SuperLU_DIST_${VERSION}/lib -g # -Wl,-Bdynamic SuperLU_DIST_5.3.0/MAKE_INC/make.opteron0000755013363400111340000000511313233431301016372 0ustar xiaoyessg############################################################################ # # Program: SuperLU_DIST # # Module: make.inc # # Purpose: Top-level Definitions # # Creation date: February 4, 1999 version alpha # # Modified: September 1, 1999 version 1.0 # March 15, 2003 version 2.0 # November 1, 2007 version 2.1 # ############################################################################ # # The machine (platform) identifier to append to the library names # PLAT = _jacquard # # The name of the libraries to be created/linked to # DSuperLUroot = ${HOME}/Release_Codes/SuperLU_DIST_4.2 DSUPERLULIB = $(DSuperLUroot)/lib/libsuperlu_dist_4.2.a # BLASDEF = -DUSE_VENDOR_BLAS BLASLIB = -L/usr/common/usg/acml/2.6.0/pathscale64/lib -lacml -lacml_mv #MPILIB = -L/usr/lpp/ppe.poe/lib -lmpi_r ############################################################################ ## parmetis 4.x.x, 32-bit integer PARMETIS_DIR := ${HOME}/Carver/lib/parmetis-4.0.3 ## parmetis 4.x.x, 64-bit integer # PARMETIS_DIR := ${HOME}/Carver/lib/parmetis-4.0.3_64 METISLIB := -L${PARMETIS_DIR}/build/Linux-x86_64/libmetis -lmetis PARMETISLIB := -L${PARMETIS_DIR}/build/Linux-x86_64/libparmetis -lparmetis I_PARMETIS := -I${PARMETIS_DIR}/include -I${PARMETIS_DIR}/metis/include ############################################################################ # Define the required Fortran libraries, if you use C compiler to link FLIBS = # Define all the libraries LIBS = $(DSUPERLULIB) $(BLASLIB) $(PARMETISLIB) $(METISLIB) # # The archiver and the flag(s) to use when building archive (library) # If your system has no ranlib, set RANLIB = echo. # ARCH = ar ARCHFLAGS = cr RANLIB = ranlib ############################################################################ # C compiler setup CC = mpicc # CFLAGS should be set to be the C flags that include optimization CFLAGS = -O3 -DDEBUGlevel=0 -DPRNTlevel=1 ${I_PARMETIS} # # NOOPTS should be set to be the C flags that turn off any optimization NOOPTS = -O0 ############################################################################ # FORTRAN compiler setup FORTRAN = mpif90 F90FLAGS = -O3 ############################################################################ LOADER = mpif90 # 32-bit: LOADOPTS = ############################################################################ # C preprocessor defs for compilation (-DNoChange, -DAdd_, or -DUpCase) # # Need follow the convention of how C calls a Fortran routine. # CDEFS = -DAdd_ SuperLU_DIST_5.3.0/MAKE_INC/make.xt4.64bit0000644013363400111340000000503413233431301016351 0ustar xiaoyessg############################################################################ # # Program: SuperLU_DIST # # Module: make.inc # # Purpose: Top-level Definitions # # Creation date: February 4, 1999 version alpha # # Modified: September 1, 1999 version 1.0 # March 15, 2003 version 2.0 # November 1, 2007 version 2.1 # ############################################################################ # # The machine (platform) identifier to append to the library names # PLAT = _xt4 # # The name of the libraries to be created/linked to # DSuperLUroot = ${HOME}/Release_Codes/SuperLU_DIST_4.2 DSUPERLULIB = $(DSuperLUroot)/lib/libsuperlu_dist_4.2.a # BLASDEF = -DUSE_VENDOR_BLAS BLASLIB = ############################################################################ ## parmetis 4.x.x, 32-bit integer PARMETIS_DIR := ${HOME}/Carver/lib/parmetis-4.0.3 ## parmetis 4.x.x, 64-bit integer # PARMETIS_DIR := ${HOME}/Carver/lib/parmetis-4.0.3_64 METISLIB := -L${PARMETIS_DIR}/build/Linux-x86_64/libmetis -lmetis PARMETISLIB := -L${PARMETIS_DIR}/build/Linux-x86_64/libparmetis -lparmetis I_PARMETIS := -I${PARMETIS_DIR}/include -I${PARMETIS_DIR}/metis/include ############################################################################ # Define the required Fortran libraries, if you use C compiler to link FLIBS = -lpgf90 -lpgf90_rpm1 -lpgf902 -lpgf90rtl -lpgftnrtl # Define all the libraries LIBS = $(DSUPERLULIB) $(BLASLIB) $(PARMETISLIB) $(METISLIB) $(FLIBS) # # The archiver and the flag(s) to use when building archive (library) # If your system has no ranlib, set RANLIB = echo. # ARCH = ar ARCHFLAGS = cr RANLIB = ranlib ############################################################################ # C compiler setup CC = cc # CFLAGS should be set to be the C flags that include optimization CFLAGS = -fastsse $(I_PARMETIS) -DDEBUGlevel=0 -DPRNTlevel=1 -D_LONGINT # # NOOPTS should be set to be the C flags that turn off any optimization NOOPTS = -O0 ############################################################################ # FORTRAN compiler setup FORTRAN = ftn F90FLAGS = -fastsse -i8 ############################################################################ LOADER = cc LOADOPTS = ############################################################################ # C preprocessor defs for compilation (-DNoChange, -DAdd_, or -DUpCase) # # Need follow the convention of how C calls a Fortran routine. # CDEFS = -DAdd_ SuperLU_DIST_5.3.0/MAKE_INC/make.sp0000644013363400111340000000536113233431301015330 0ustar xiaoyessg############################################################################ # # Program: SuperLU_DIST # # Module: make.inc # # Purpose: Top-level Definitions # # Creation date: February 4, 1999 version alpha # # Modified: September 1, 1999 version 1.0 # March 15, 2003 version 2.0 # November 1, 2007 version 2.1 # ############################################################################ # # The machine (platform) identifier to append to the library names # PLAT = _sp # # The name of the libraries to be created/linked to # DSuperLUroot = ${HOME}/Release_Codes/SuperLU_DIST_4.2 DSUPERLULIB = $(DSuperLUroot)/lib/libsuperlu_dist_4.2.a # BLASDEF = -DUSE_VENDOR_BLAS BLASLIB = -lessl #MPILIB = -L/usr/lpp/ppe.poe/lib -lmpi #PERFLIB = -L/vol1/VAMPIR/lib -lVT ############################################################################ ## parmetis 4.x.x, 32-bit integer PARMETIS_DIR := ${HOME}/Carver/lib/parmetis-4.0.3 ## parmetis 4.x.x, 64-bit integer # PARMETIS_DIR := ${HOME}/Carver/lib/parmetis-4.0.3_64 METISLIB := -L${PARMETIS_DIR}/build/Linux-x86_64/libmetis -lmetis PARMETISLIB := -L${PARMETIS_DIR}/build/Linux-x86_64/libparmetis -lparmetis I_PARMETIS := -I${PARMETIS_DIR}/include -I${PARMETIS_DIR}/metis/include ############################################################################ # Define the required Fortran libraries, if you use C compiler to link FLIBS = # Define all the libraries LIBS = $(DSUPERLULIB) $(BLASLIB) $(PARMETISLIB) $(METISLIB) $(FLIBS) # # The archiver and the flag(s) to use when building archive (library) # If your system has no ranlib, set RANLIB = echo. # ARCH = ar ARCHFLAGS = cr RANLIB = ranlib ############################################################################ CC = mpcc # CFLAGS should be set to be the C flags that include optimization CFLAGS = -D_SP -O3 -qarch=PWR3 -qalias=allptrs \ -DDEBUGlevel=0 -DPRNTlevel=0 $(I_PARMETIS) # # NOOPTS should be set to be the C flags that turn off any optimization # This must be enforced to compile the two routines: slamch.c and dlamch.c. NOOPTS = ############################################################################ FORTRAN = mpxlf90 F90FLAGS = -WF,-Dsp -O3 -Q -qstrict -qfixed -qinit=f90ptr -qarch=pwr3 ############################################################################ LOADER = mpxlf90 #LOADOPTS = -bmaxdata:0x80000000 LOADOPTS = -bmaxdata:0x70000000 # ############################################################################ # C preprocessor defs for compilation (-DNoChange, -DAdd_, or -DUpCase) # # Need follow the convention of how C calls a Fortran routine. # CDEFS = -DNoChange SuperLU_DIST_5.3.0/MAKE_INC/make.i386_linux0000644013363400111340000000514513233431301016616 0ustar xiaoyessg############################################################################ # # Program: SuperLU_DIST # # Module: make.inc # # Purpose: Top-level Definitions # # Creation date: February 4, 1999 version alpha # # Modified: September 1, 1999 version 1.0 # March 15, 2003 version 2.0 # # January 18, 2006 Sam Adams # General Dynamics - Network Systems # works for i386 Linux, with LAM-MPI 7.1.1 and GCC 4. # ############################################################################ # # The machine (platform) identifier to append to the library names # PLAT = _i386 # # The name of the libraries to be created/linked to # DSuperLUroot = ${HOME}/Release_Codes/SuperLU_DIST_5.1.3 DSUPERLULIB = $(DSuperLUroot)/lib/libsuperlu_dist.a # BLASDEF = -DUSE_VENDOR_BLAS BLASLIB = /usr/lib/libblas.so.3 ############################################################################ ## parmetis 4.x.x, 32-bit integer PARMETIS_DIR := ${HOME}/Carver/lib/parmetis-4.0.3 ## parmetis 4.x.x, 64-bit integer # PARMETIS_DIR := ${HOME}/Carver/lib/parmetis-4.0.3_64 METISLIB := -L${PARMETIS_DIR}/build/Linux-x86_64/libmetis -lmetis PARMETISLIB := -L${PARMETIS_DIR}/build/Linux-x86_64/libparmetis -lparmetis I_PARMETIS := -I${PARMETIS_DIR}/include -I${PARMETIS_DIR}/metis/include ############################################################################ # Define the required Fortran libraries, if you use C compiler to link FLIBS = # Define all the libraries LIBS = $(DSUPERLULIB) $(BLASLIB) $(PARMETISLIB) $(METISLIB) # # The archiver and the flag(s) to use when building archive (library) # If your system has no ranlib, set RANLIB = echo. # ARCH = ar ARCHFLAGS = cr RANLIB = ranlib ############################################################################ # C compiler setup CC = mpicc # CFLAGS should be set to be the C flags that include optimization CFLAGS = -pipe -O2 ${I_PARMETIS} # # NOOPTS should be set to be the C flags that turn off any optimization NOOPTS = ############################################################################ # FORTRAN compiler setup FORTRAN = mpif77 F90FLAGS = ############################################################################ LOADER = mpif77 LOADOPTS = ############################################################################ # C preprocessor defs for compilation (-DNoChange, -DAdd_, or -DUpCase) # # Need follow the convention of how C calls a Fortran routine. # CDEFS = -DAdd__ SuperLU_DIST_5.3.0/MAKE_INC/make.t3e0000644013363400111340000000420713233431301015377 0ustar xiaoyessg############################################################################ # # Program: SuperLU_DIST # # Module: make.inc # # Purpose: Top-level Definitions # # Creation date: February 4, 1997 version alpha # # Modified: September 1, 1999 version 1.0 # November 1, 2007 version 2.1 # ############################################################################ # # The machine (platform) identifier to append to the library names # PLAT = _t3e # # The name of the libraries to be created/linked to # DSuperLUroot = ${HOME}/Release_Codes/SuperLU_DIST_4.2 DSUPERLULIB = $(DSuperLUroot)/lib/libsuperlu_dist_4.2.a # BLASDEF = -DUSE_VENDOR_BLAS # #PERFLIB = -l pat pat.cld #PERFLIB = -lapp METISLIB = PARMETISLIB = # Define the required Fortran libraries, if you use C compiler to link FLIBS = # Define all the libraries LIBS = $(DSUPERLULIB) $(BLASLIB) $(PARMETISLIB) $(METISLIB) $(FLIBS) # # The archiver and the flag(s) to use when building archive (library) # If your system has no ranlib, set RANLIB = echo. # ARCH = ar ARCHFLAGS = cr RANLIB = ranlib ############################################################################ # C compiler setup CC = cc # CFLAGS should be set to be the C flags that include optimization #CFLAGS = -D_CRAY -DPRNTlevel=1 -O3 -h aggress,split,unroll CFLAGS = -O3 -D_CRAY -DPRNTlevel=0 -DDEBUGlevel=0 -DPROFlevel=0 # -happrentice,inline0 PTROPT = -h restrict=a # # NOOPTS should be set to be the C flags that turn off any optimization # This must be enforced to compile the two routines: slamch.c and dlamch.c. NOOPTS = ############################################################################ # FORTRAN compiler setup FORTRAN = f90 F90FLAGS = -O3 -dp -i 32 ############################################################################ LOADER = cc LOADOPTS = # ############################################################################ # C preprocessor defs for compilation (-DNoChange, -DAdd_, or -DUpCase) # # Need follow the convention of how C calls a Fortran routine. # CDEFS = -DUpCase SuperLU_DIST_5.3.0/MAKE_INC/make.xe60000644013363400111340000000525513233431301015412 0ustar xiaoyessg############################################################################ # # Program: SuperLU_DIST # # Module: make.inc # # Purpose: Top-level Definitions # # Creation date: February 4, 1999 version alpha # # Modified: September 1, 1999 version 1.0 # March 15, 2003 version 2.0 # November 1, 2007 version 2.1 # September 1, 2011 version 3.0 # ############################################################################ # # The machine (platform) identifier to append to the library names # PLAT = _xe6 # # The name of the libraries to be created/linked to # DSuperLUroot = ${HOME}/Release_Codes/SuperLU_DIST_4.3 DSUPERLULIB = $(DSuperLUroot)/lib/libsuperlu_dist_4.3.a # BLASDEF = -DUSE_VENDOR_BLAS BLASLIB = # ############################################################################ ## parmetis 4.x.x, 32-bit integer PARMETIS_DIR := ${HOME}/Edison/lib/parmetis-4.0.3 ## parmetis 4.x.x, 64-bit integer # PARMETIS_DIR := ${HOME}/Edison/lib/parmetis-4.0.3_64 METISLIB := -L${PARMETIS_DIR}/build/Linux-x86_64/libmetis -lmetis PARMETISLIB := -L${PARMETIS_DIR}/build/Linux-x86_64/libparmetis -lparmetis I_PARMETIS := -I${PARMETIS_DIR}/include -I${PARMETIS_DIR}/metis/include ############################################################################ # Define the required Fortran libraries, if you use C compiler to link FLIBS = -lpgf90 -lpgf90_rpm1 ## for PGI compiler # FLIBS = -lifport -lifcore ## for Intel compiler # Define all the libraries LIBS = $(DSUPERLULIB) $(BLASLIB) $(PARMETISLIB) $(METISLIB) $(FLIBS) # # The archiver and the flag(s) to use when building archive (library) # If your system has no ranlib, set RANLIB = echo. # ARCH = ar ARCHFLAGS = cr RANLIB = ranlib ############################################################################ # C compiler setup CC = cc # CFLAGS should be set to be the C flags that include optimization CFLAGS = -fast -Mipa=fast,safe -m64 $(I_PARMETIS) \ -DDEBUGlevel=0 -DPRNTlevel=1 -DPROFlevel=0 \ # -D_LONGINT # # NOOPTS should be set to be the C flags that turn off any optimization NOOPTS = -O0 ############################################################################ # FORTRAN compiler setup FORTRAN = ftn F90FLAGS = -fast -Mipa=fast,safe ############################################################################ LOADER = $(CC) LOADOPTS = -fast ############################################################################ # C preprocessor defs for compilation (-DNoChange, -DAdd_, or -DUpCase) # # Need follow the convention of how C calls a Fortran routine. # CDEFS = -DAdd_ SuperLU_DIST_5.3.0/MAKE_INC/make.xt40000644013363400111340000000415613233431301015426 0ustar xiaoyessg############################################################################ # # Program: SuperLU_DIST # # Module: make.inc # # Purpose: Top-level Definitions # # Creation date: February 4, 1999 version alpha # # Modified: September 1, 1999 version 1.0 # March 15, 2003 version 2.0 # November 1, 2007 version 2.1 # ############################################################################ # # The machine (platform) identifier to append to the library names # PLAT = _xt4 VERSION = 5.0.0 # # The name of the libraries to be created/linked to # DSuperLUroot = ${HOME}/Release_Codes/SuperLU_DIST_${VERSION} DSUPERLULIB = $(DSuperLUroot)/lib/libsuper_dist_${VERSION}.a # BLASDEF = -DUSE_VENDOR_BLAS BLASLIB = METISLIB = -L/usr/common/usg/parmetis/3.1 -lmetis PARMETISLIB = -L/usr/common/usg/parmetis/3.1 -lparmetis # Define the required Fortran libraries, if you use C compiler to link FLIBS = -lpgf90 -lpgf90_rpm1 -lpgf902 -lpgf90rtl -lpgftnrtl # Define all the libraries LIBS = $(DSUPERLULIB) $(BLASLIB) $(PARMETISLIB) $(METISLIB) $(FLIBS) # # The archiver and the flag(s) to use when building archive (library) # If your system has no ranlib, set RANLIB = echo. # ARCH = ar ARCHFLAGS = cr RANLIB = ranlib ############################################################################ # C compiler setup CC = cc # CFLAGS should be set to be the C flags that include optimization CFLAGS = -fastsse -DDEBUGlevel=0 -DPRNTlevel=1 # # NOOPTS should be set to be the C flags that turn off any optimization NOOPTS = -O0 ############################################################################ # FORTRAN compiler setup FORTRAN = ftn F90FLAGS = -fastsse ############################################################################ LOADER = cc LOADOPTS = ############################################################################ # C preprocessor defs for compilation (-DNoChange, -DAdd_, or -DUpCase) # # Need follow the convention of how C calls a Fortran routine. # CDEFS = -DAdd_ SuperLU_DIST_5.3.0/MAKE_INC/make.xt50000644013363400111340000000520413233431301015422 0ustar xiaoyessg############################################################################ # # Program: SuperLU_DIST # # Module: make.inc # # Purpose: Top-level Definitions # # Creation date: February 4, 1999 version alpha # # Modified: September 1, 1999 version 1.0 # March 15, 2003 version 2.0 # November 1, 2007 version 2.1 # ############################################################################ # # The machine (platform) identifier to append to the library names # PLAT = _xt5 # # The name of the libraries to be created/linked to # DSuperLUroot = ${HOME}/Release_Codes/SuperLU_DIST_4.3 DSUPERLULIB = $(DSuperLUroot)/lib/libsuperlu_dist_4.3.a # BLASDEF = -DUSE_VENDOR_BLAS BLASLIB = ############################################################################ ## parmetis 4.x.x, 32-bit integer PARMETIS_DIR := ${HOME}/Carver/lib/parmetis-4.0.3 ## parmetis 4.x.x, 64-bit integer # PARMETIS_DIR := ${HOME}/Carver/lib/parmetis-4.0.3_64 METISLIB := -L${PARMETIS_DIR}/build/Linux-x86_64/libmetis -lmetis PARMETISLIB := -L${PARMETIS_DIR}/build/Linux-x86_64/libparmetis -lparmetis I_PARMETIS := -I${PARMETIS_DIR}/include -I${PARMETIS_DIR}/metis/include ############################################################################ # Define the required Fortran libraries, if you use C compiler to link FLIBS = -lpgf90 -lpgf90_rpm1 -lpgf902 -lpgf90rtl -lpgftnrtl # Define all the libraries LIBS = $(DSUPERLULIB) $(BLASLIB) $(PARMETISLIB) $(METISLIB) $(FLIBS) # Include directories for header files INCS = ${I_PARMETIS} # # The archiver and the flag(s) to use when building archive (library) # If your system has no ranlib, set RANLIB = echo. # ARCH = ar ARCHFLAGS = cr RANLIB = ranlib ############################################################################ # C compiler setup CC = cc INCS = $(I_PARMETIS) # CFLAGS should be set to be the C flags that include optimization CFLAGS = ${INCS} -c99 -fastsse -DDEBUGlevel=0 -DPRNTlevel=1 #XSDK_INDEX_SIZE = 64 ## 64-bit integer # # NOOPTS should be set to be the C flags that turn off any optimization NOOPTS = -O0 ############################################################################ # FORTRAN compiler setup FORTRAN = ftn F90FLAGS = -fastsse #-i8 ############################################################################ LOADER = cc LOADOPTS = ############################################################################ # C preprocessor defs for compilation (-DNoChange, -DAdd_, or -DUpCase) # # Need follow the convention of how C calls a Fortran routine. # CDEFS = -DAdd_ SuperLU_DIST_5.3.0/MAKE_INC/make.cuda_gpu0000644013363400111340000000567313233431301016503 0ustar xiaoyessg############################################################################ # # Program: SuperLU_DIST # # Module: make.inc # # Purpose: Top-level Definitions # # Creation date: February 4, 1999 version alpha # # Modified: September 1, 1999 version 1.0 # March 15, 2003 version 2.0 # November 1, 2007 version 2.1 # September 1, 2011 version 3.0 # October 1, 2014 version 4.0 # ############################################################################ # # The machine (platform) identifier to append to the library names # Carver: Intel compiler PLAT = _sp # # The name of the libraries to be created/linked to # DSuperLUroot = ${HOME}/Release_Codes/SuperLU_DIST_4.2 DSUPERLULIB = $(DSuperLUroot)/lib/libsuperlu_dist_4.2.a # BLASDEF = -DUSE_VENDOR_BLAS BLASLIB = ${MKL} # ################# parmetis 4.x.x, 32-bit integer ########################### PARMETIS_DIR := ${HOME}/Carver/lib/parmetis-4.0.3 ## parmetis 4.x.x, 64-bit integer # PARMETIS_DIR := ${HOME}/Carver/lib/parmetis-4.0.3_64 METISLIB := -L${PARMETIS_DIR}/build/Linux-x86_64/libmetis -lmetis PARMETISLIB := -L${PARMETIS_DIR}/build/Linux-x86_64/libparmetis -lparmetis I_PARMETIS := -I${PARMETIS_DIR}/include -I${PARMETIS_DIR}/metis/include ############################################################################ # Define the required Fortran libraries, if you use C compiler to link FLIBS = # Define all the libraries LIBS = $(DSUPERLULIB) $(PARMETISLIB) $(METISLIB) $(BLASLIB) $(FLIBS) # Include directories for header files INCS = ${I_PARMETIS} # # The archiver and the flag(s) to use when building archive (library) # If your system has no ranlib, set RANLIB = echo. # ARCH = ar ARCHFLAGS = cr RANLIB = ranlib ############################################################################ # C compiler setup CC = mpicc # CFLAGS should be set to be the C flags that include optimization CFLAGS = ${CUDA_FLAGS} ${INCS} -std=c99 -O3 -Wall -w2 -openmp -mkl \ -DDEBUGlevel=0 -DPRNTlevel=1 -DPROFlevel=0 # -Wunused-variable #XSDK_INDEX_SIZE = 64 ## 64-bit integer # # NOOPTS should be set to be the C flags that turn off any optimization NOOPTS = -O0 # Add more flags to use GPU ifeq "${ACC}" "GPU" CFLAGS += -DGPU_ACC INCS += -I/usr/common/usg/cuda/5.5/include LIBS += -L/usr/common/usg/cuda/5.5/lib64 -lcublas -lcudart endif ############################################################################ # FORTRAN compiler setup FORTRAN = mpif90 F90FLAGS = -fast -Mnomain ############################################################################ LOADER = mpicc LOADOPTS = -openmp #-Mnomain ############################################################################ # C preprocessor defs for compilation (-DNoChange, -DAdd_, or -DUpCase) # # Need follow the convention of how C calls a Fortran routine. # CDEFS = -DNoChange SuperLU_DIST_5.3.0/MAKE_INC/make.xt4_pgi0000644013363400111340000000503313233431301016260 0ustar xiaoyessg############################################################################ # # Program: SuperLU_DIST # # Module: make.inc # # Purpose: Top-level Definitions # # Creation date: February 4, 1999 version alpha # # Modified: September 1, 1999 version 1.0 # March 15, 2003 version 2.0 # November 1, 2007 version 2.1 # ############################################################################ # # The machine (platform) identifier to append to the library names # PLAT = _xt4 # # The name of the libraries to be created/linked to # DSuperLUroot = ${HOME}/Release_Codes/SuperLU_DIST_4.2 DSUPERLULIB = $(DSuperLUroot)/lib/libsuperlu_dist_4.2.a # BLASDEF = -DUSE_VENDOR_BLAS BLASLIB = ############################################################################ ## parmetis 4.x.x, 32-bit integer PARMETIS_DIR := ${HOME}/Carver/lib/parmetis-4.0.3 ## parmetis 4.x.x, 64-bit integer # PARMETIS_DIR := ${HOME}/Carver/lib/parmetis-4.0.3_64 METISLIB := -L${PARMETIS_DIR}/build/Linux-x86_64/libmetis -lmetis PARMETISLIB := -L${PARMETIS_DIR}/build/Linux-x86_64/libparmetis -lparmetis I_PARMETIS := -I${PARMETIS_DIR}/include -I${PARMETIS_DIR}/metis/include ############################################################################ # Define the required Fortran libraries, if you use C compiler to link FLIBS = -lpgf90 -lpgf90_rpm1 -lpgf902 -lpgf90rtl -lpgftnrtl # Define all the libraries LIBS = $(DSUPERLULIB) $(BLASLIB) $(PARMETISLIB) $(METISLIB) $(FLIBS) # # The archiver and the flag(s) to use when building archive (library) # If your system has no ranlib, set RANLIB = echo. # ARCH = ar ARCHFLAGS = cr RANLIB = ranlib ############################################################################ # C compiler setup CC = cc # CFLAGS should be set to be the C flags that include optimization CFLAGS = -fastsse $(I_PARMETIS) -DDEBUGlevel=0 -DPRNTlevel=1 -DPROFlevel=0 # # NOOPTS should be set to be the C flags that turn off any optimization NOOPTS = -O0 ############################################################################ # FORTRAN compiler setup FORTRAN = ftn F90FLAGS = -fastsse ############################################################################ LOADER = cc LOADOPTS = ############################################################################ # C preprocessor defs for compilation (-DNoChange, -DAdd_, or -DUpCase) # # Need follow the convention of how C calls a Fortran routine. # CDEFS = -DAdd_ SuperLU_DIST_5.3.0/MAKE_INC/make.sp.64bit0000644013363400111340000000526113233431301016256 0ustar xiaoyessg############################################################################ # # Program: SuperLU_DIST # # Module: make.inc # # Purpose: Top-level Definitions # # Creation date: February 4, 1999 version alpha # # Modified: September 1, 1999 version 1.0 # March 15, 2003 version 2.0 # November 1, 2007 version 2.1 # ############################################################################ # # The machine (platform) identifier to append to the library names # PLAT = _power5 # # The name of the libraries to be created/linked to # DSuperLUroot = ${HOME}/Release_Codes/SuperLU_DIST_4.2 DSUPERLULIB = $(DSuperLUroot)/lib/libsuperlu_dist_4.2.a # BLASDEF = -DUSE_VENDOR_BLAS BLASLIB = -lessl ############################################################################ ## parmetis 4.x.x, 32-bit integer # PARMETIS_DIR := ${HOME}/Carver/lib/parmetis-4.0.3 ## parmetis 4.x.x, 64-bit integer PARMETIS_DIR := ${HOME}/Carver/lib/parmetis-4.0.3_64 METISLIB := -L${PARMETIS_DIR}/build/Linux-x86_64/libmetis -lmetis PARMETISLIB := -L${PARMETIS_DIR}/build/Linux-x86_64/libparmetis -lparmetis I_PARMETIS := -I${PARMETIS_DIR}/include -I${PARMETIS_DIR}/metis/include ############################################################################ # Define the required Fortran libraries, if you use C compiler to link FLIBS = # Define all the libraries LIBS = $(DSUPERLULIB) $(BLASLIB) $(PARMETISLIB) $(METISLIB) $(FLIBS) # # The archiver and the flag(s) to use when building archive (library) # If your system has no ranlib, set RANLIB = echo. # ARCH = ar # 64-bit: ARCHFLAGS = -X64 cr RANLIB = ranlib ############################################################################ # C compiler setup # 64-bit CC = mpcc_r # CFLAGS should be set to be the C flags that include optimization CFLAGS = -D_SP -qarch=pwr5 -qalias=allptrs -q64 \ -DDEBUGlevel=0 -DPRNTlevel=0 -O3 $(I_PARMETIS) # # NOOPTS should be set to be the C flags that turn off any optimization # 64-bit NOOPTS = -q64 ############################################################################ # FORTRAN compiler setup # 64-bit FORTRAN = mpxlf90_r F90FLAGS = -WF,-Dsp -O3 -Q -qstrict -qfixed -qinit=f90ptr -qarch=pwr5\ -q64 #-qintsize=8 ############################################################################ # 64-bit LOADER = mpxlf90_r # 64-bit: LOADOPTS = -q64 ############################################################################ # C preprocessor defs for compilation (-DNoChange, -DAdd_, or -DUpCase) # # Need follow the convention of how C calls a Fortran routine. # CDEFS = -DNoChange SuperLU_DIST_5.3.0/MAKE_INC/make.carver0000644013363400111340000000564313233431301016173 0ustar xiaoyessg############################################################################ # # Program: SuperLU_DIST # # Module: make.inc # # Purpose: Top-level Definitions # # Creation date: February 4, 1999 version alpha # # Modified: September 1, 1999 version 1.0 # March 15, 2003 version 2.0 # November 1, 2007 version 2.1 # September 1, 2011 version 3.0 # October 1, 2014 version 4.0 # ############################################################################ # # The machine (platform) identifier to append to the library names # Carver: Intel compiler PLAT = _sp # # The name of the libraries to be created/linked to # DSuperLUroot = ${HOME}/Release_Codes/SuperLU_DIST_4.2 DSUPERLULIB = $(DSuperLUroot)/lib/libsuperlu_dist_4.2.a # BLASDEF = -DUSE_VENDOR_BLAS BLASLIB = ${MKL} # ################# parmetis 4.x.x, 32-bit integer ########################### PARMETIS_DIR := ${HOME}/Carver/lib/parmetis-4.0.3 ## parmetis 4.x.x, 64-bit integer # PARMETIS_DIR := ${HOME}/Carver/lib/parmetis-4.0.3_64 METISLIB := -L${PARMETIS_DIR}/build/Linux-x86_64/libmetis -lmetis PARMETISLIB := -L${PARMETIS_DIR}/build/Linux-x86_64/libparmetis -lparmetis I_PARMETIS := -I${PARMETIS_DIR}/include -I${PARMETIS_DIR}/metis/include ############################################################################ # Define the required Fortran libraries, if you use C compiler to link FLIBS = # Define all the libraries LIBS = $(DSUPERLULIB) $(PARMETISLIB) $(METISLIB) $(BLASLIB) $(FLIBS) # Include directories for header files INCS = ${I_PARMETIS} # # The archiver and the flag(s) to use when building archive (library) # If your system has no ranlib, set RANLIB = echo. # ARCH = ar ARCHFLAGS = cr RANLIB = ranlib ############################################################################ # C compiler setup CC = mpicc # CFLAGS should be set to be the C flags that include optimization CFLAGS = ${CUDA_FLAGS} ${INCS} -std=c99 -O3 -Wall -w2 -openmp -mkl \ -DDEBUGlevel=0 -DPRNTlevel=1 -DPROFlevel=0 \ # -D_LONGINT # -Wunused-variable # # NOOPTS should be set to be the C flags that turn off any optimization NOOPTS = -O0 # Add more flags to use GPU ifeq "${ACC}" "GPU" CFLAGS += -DGPU_ACC INCS += -I/usr/common/usg/cuda/5.5/include LIBS += -L/usr/common/usg/cuda/5.5/lib64 -lcublas -lcudart endif ############################################################################ # FORTRAN compiler setup FORTRAN = mpif90 F90FLAGS = -fast -Mnomain ############################################################################ LOADER = mpicc LOADOPTS = -openmp #-Mnomain ############################################################################ # C preprocessor defs for compilation (-DNoChange, -DAdd_, or -DUpCase) # # Need follow the convention of how C calls a Fortran routine. # CDEFS = -DNoChange SuperLU_DIST_5.3.0/MAKE_INC/make.origin0000644013363400111340000000521213233431301016170 0ustar xiaoyessg############################################################################ # # Program: SuperLU_DIST # # Module: make.inc # # Purpose: Top-level Definitions # # Creation date: February 4, 1997 version 1.0 # # Modified: November 11, 2002 (by Tom Oppe) # November 1, 2007 version 2.1 # ############################################################################ # # The machine (platform) identifier to append to the library names # PLAT = _sgi # # The name of the libraries to be created/linked to # DSuperLUroot = ${HOME}/Release_Codes/SuperLU_DIST_4.2 DSUPERLULIB = $(DSuperLUroot)/lib/libsuperlu_dist_4.2.a # BLASDEF = -DUSE_VENDOR_BLAS BLASLIB = -lscs MPILIB = -lmpi ############################################################################ ## parmetis 4.x.x, 32-bit integer PARMETIS_DIR := ${HOME}/Carver/lib/parmetis-4.0.3 ## parmetis 4.x.x, 64-bit integer # PARMETIS_DIR := ${HOME}/Carver/lib/parmetis-4.0.3_64 METISLIB := -L${PARMETIS_DIR}/build/Linux-x86_64/libmetis -lmetis PARMETISLIB := -L${PARMETIS_DIR}/build/Linux-x86_64/libparmetis -lparmetis I_PARMETIS := -I${PARMETIS_DIR}/include -I${PARMETIS_DIR}/metis/include ############################################################################ # Define the required Fortran libraries, if you use C compiler to link FLIBS = -lfortran # Define all the libraries # LIBS = $(DSUPERLULIB) $(BLASLIB) $(PARMETISLIB) $(METISLIB) \ $(MPILIB) $(FLIBS) # # The archiver and the flag(s) to use when building archive (library) # If your system has no ranlib, set RANLIB = echo. # ARCH = ar ARCHFLAGS = crv RANLIB = touch ####################################################################### # C compiler setup CC = cc ISA = -64 -mips4 -TARG:platform=ip35 # CFLAGS should be set to be the C flags that include optimization CFLAGS = $(ISA) $(I_PARMETIS) -O2 # # NOOPTS should be set to be the C flags that turn off any optimization # This must be enforced to compile the two routines: slamch.c and dlamch.c. NOOPTS = $(ISA) -O0 ############################################################################ # FORTRAN compiler setup FORTRAN = f90 F90FLAGS = $(CFLAGS) ############################################################################ LOADER = cc LOADOPTS = $(CFLAGS) # ############################################################################ # C preprocessor defs for compilation (-DNoChange, -DAdd_, or -DUpCase) # # Need follow the convention of how C calls a Fortran routine. # CDEFS = -DAdd_ -DORIGIN