Blender V2.61 - r43446

sgstrf.c

Go to the documentation of this file.
00001 
00005 /*
00006  * -- SuperLU routine (version 3.0) --
00007  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
00008  * and Lawrence Berkeley National Lab.
00009  * October 15, 2003
00010  *
00011  */
00012 /*
00013   Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
00014  
00015   THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
00016   EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
00017  
00018   Permission is hereby granted to use or copy this program for any
00019   purpose, provided the above notices are retained on all copies.
00020   Permission to modify the code and to distribute modified code is
00021   granted, provided the above notices are retained, and a notice that
00022   the code was modified is included with the above copyright notice.
00023 */
00024 
00025 #include "ssp_defs.h"
00026 
00027 void
00028 sgstrf (superlu_options_t *options, SuperMatrix *A,
00029         int relax, int panel_size, int *etree, void *work, int lwork,
00030         int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U,
00031         SuperLUStat_t *stat, int *info)
00032 {
00033 /*
00034  * Purpose
00035  * =======
00036  *
00037  * SGSTRF computes an LU factorization of a general sparse m-by-n
00038  * matrix A using partial pivoting with row interchanges.
00039  * The factorization has the form
00040  *     Pr * A = L * U
00041  * where Pr is a row permutation matrix, L is lower triangular with unit
00042  * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper 
00043  * triangular (upper trapezoidal if A->nrow < A->ncol).
00044  *
00045  * See supermatrix.h for the definition of 'SuperMatrix' structure.
00046  *
00047  * Arguments
00048  * =========
00049  *
00050  * options (input) superlu_options_t*
00051  *         The structure defines the input parameters to control
00052  *         how the LU decomposition will be performed.
00053  *
00054  * A        (input) SuperMatrix*
00055  *      Original matrix A, permuted by columns, of dimension
00056  *          (A->nrow, A->ncol). The type of A can be:
00057  *          Stype = SLU_NCP; Dtype = SLU_S; Mtype = SLU_GE.
00058  *
00059  * drop_tol (input) float (NOT IMPLEMENTED)
00060  *      Drop tolerance parameter. At step j of the Gaussian elimination,
00061  *          if abs(A_ij)/(max_i abs(A_ij)) < drop_tol, drop entry A_ij.
00062  *          0 <= drop_tol <= 1. The default value of drop_tol is 0.
00063  *
00064  * relax    (input) int
00065  *          To control degree of relaxing supernodes. If the number
00066  *          of nodes (columns) in a subtree of the elimination tree is less
00067  *          than relax, this subtree is considered as one supernode,
00068  *          regardless of the row structures of those columns.
00069  *
00070  * panel_size (input) int
00071  *          A panel consists of at most panel_size consecutive columns.
00072  *
00073  * etree    (input) int*, dimension (A->ncol)
00074  *          Elimination tree of A'*A.
00075  *          Note: etree is a vector of parent pointers for a forest whose
00076  *          vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
00077  *          On input, the columns of A should be permuted so that the
00078  *          etree is in a certain postorder.
00079  *
00080  * work     (input/output) void*, size (lwork) (in bytes)
00081  *          User-supplied work space and space for the output data structures.
00082  *          Not referenced if lwork = 0;
00083  *
00084  * lwork   (input) int
00085  *         Specifies the size of work array in bytes.
00086  *         = 0:  allocate space internally by system malloc;
00087  *         > 0:  use user-supplied work array of length lwork in bytes,
00088  *               returns error if space runs out.
00089  *         = -1: the routine guesses the amount of space needed without
00090  *               performing the factorization, and returns it in
00091  *               *info; no other side effects.
00092  *
00093  * perm_c   (input) int*, dimension (A->ncol)
00094  *      Column permutation vector, which defines the 
00095  *          permutation matrix Pc; perm_c[i] = j means column i of A is 
00096  *          in position j in A*Pc.
00097  *          When searching for diagonal, perm_c[*] is applied to the
00098  *          row subscripts of A, so that diagonal threshold pivoting
00099  *          can find the diagonal of A, rather than that of A*Pc.
00100  *
00101  * perm_r   (input/output) int*, dimension (A->nrow)
00102  *          Row permutation vector which defines the permutation matrix Pr,
00103  *          perm_r[i] = j means row i of A is in position j in Pr*A.
00104  *          If options->Fact = SamePattern_SameRowPerm, the pivoting routine
00105  *             will try to use the input perm_r, unless a certain threshold
00106  *             criterion is violated. In that case, perm_r is overwritten by
00107  *             a new permutation determined by partial pivoting or diagonal
00108  *             threshold pivoting.
00109  *          Otherwise, perm_r is output argument;
00110  *
00111  * L        (output) SuperMatrix*
00112  *          The factor L from the factorization Pr*A=L*U; use compressed row 
00113  *          subscripts storage for supernodes, i.e., L has type: 
00114  *          Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
00115  *
00116  * U        (output) SuperMatrix*
00117  *      The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
00118  *          storage scheme, i.e., U has types: Stype = SLU_NC, 
00119  *          Dtype = SLU_S, Mtype = SLU_TRU.
00120  *
00121  * stat     (output) SuperLUStat_t*
00122  *          Record the statistics on runtime and floating-point operation count.
00123  *          See util.h for the definition of 'SuperLUStat_t'.
00124  *
00125  * info     (output) int*
00126  *          = 0: successful exit
00127  *          < 0: if info = -i, the i-th argument had an illegal value
00128  *          > 0: if info = i, and i is
00129  *             <= A->ncol: U(i,i) is exactly zero. The factorization has
00130  *                been completed, but the factor U is exactly singular,
00131  *                and division by zero will occur if it is used to solve a
00132  *                system of equations.
00133  *             > A->ncol: number of bytes allocated when memory allocation
00134  *                failure occurred, plus A->ncol. If lwork = -1, it is
00135  *                the estimated amount of space needed, plus A->ncol.
00136  *
00137  * ======================================================================
00138  *
00139  * Local Working Arrays: 
00140  * ======================
00141  *   m = number of rows in the matrix
00142  *   n = number of columns in the matrix
00143  *
00144  *   xprune[0:n-1]: xprune[*] points to locations in subscript 
00145  *  vector lsub[*]. For column i, xprune[i] denotes the point where 
00146  *  structural pruning begins. I.e. only xlsub[i],..,xprune[i]-1 need 
00147  *  to be traversed for symbolic factorization.
00148  *
00149  *   marker[0:3*m-1]: marker[i] = j means that node i has been 
00150  *  reached when working on column j.
00151  *  Storage: relative to original row subscripts
00152  *  NOTE: There are 3 of them: marker/marker1 are used for panel dfs, 
00153  *        see spanel_dfs.c; marker2 is used for inner-factorization,
00154  *            see scolumn_dfs.c.
00155  *
00156  *   parent[0:m-1]: parent vector used during dfs
00157  *      Storage: relative to new row subscripts
00158  *
00159  *   xplore[0:m-1]: xplore[i] gives the location of the next (dfs) 
00160  *  unexplored neighbor of i in lsub[*]
00161  *
00162  *   segrep[0:nseg-1]: contains the list of supernodal representatives
00163  *  in topological order of the dfs. A supernode representative is the 
00164  *  last column of a supernode.
00165  *      The maximum size of segrep[] is n.
00166  *
00167  *   repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a 
00168  *  supernodal representative r, repfnz[r] is the location of the first 
00169  *  nonzero in this segment.  It is also used during the dfs: repfnz[r]>0
00170  *  indicates the supernode r has been explored.
00171  *  NOTE: There are W of them, each used for one column of a panel. 
00172  *
00173  *   panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below 
00174  *      the panel diagonal. These are filled in during spanel_dfs(), and are
00175  *      used later in the inner LU factorization within the panel.
00176  *  panel_lsub[]/dense[] pair forms the SPA data structure.
00177  *  NOTE: There are W of them.
00178  *
00179  *   dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
00180  *             NOTE: there are W of them.
00181  *
00182  *   tempv[0:*]: real temporary used for dense numeric kernels;
00183  *  The size of this array is defined by NUM_TEMPV() in ssp_defs.h.
00184  *
00185  */
00186     /* Local working arrays */
00187     NCPformat *Astore;
00188     int       *iperm_r = NULL; /* inverse of perm_r;
00189                used when options->Fact == SamePattern_SameRowPerm */
00190     int       *iperm_c; /* inverse of perm_c */
00191     int       *iwork;
00192     float    *swork;
00193     int       *segrep, *repfnz, *parent, *xplore;
00194     int       *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */
00195     int       *xprune;
00196     int       *marker;
00197     float    *dense, *tempv;
00198     int       *relax_end;
00199     float    *a;
00200     int       *asub;
00201     int       *xa_begin, *xa_end;
00202     int       *xsup, *supno;
00203     int       *xlsub, *xlusup, *xusub;
00204     int       nzlumax;
00205     static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */
00206 
00207     /* Local scalars */
00208     fact_t    fact = options->Fact;
00209     double    diag_pivot_thresh = options->DiagPivotThresh;
00210     int       pivrow;   /* pivotal row number in the original matrix A */
00211     int       nseg1;    /* no of segments in U-column above panel row jcol */
00212     int       nseg; /* no of segments in each U-column */
00213     register int jcol;  
00214     register int kcol;  /* end column of a relaxed snode */
00215     register int icol;
00216     register int i, k, jj, new_next, iinfo;
00217     int       m, n, min_mn, jsupno, fsupc, nextlu, nextu;
00218     int       w_def;    /* upper bound on panel width */
00219     int       usepr, iperm_r_allocated = 0;
00220     int       nnzL, nnzU;
00221     int       *panel_histo = stat->panel_histo;
00222     flops_t   *ops = stat->ops;
00223 
00224     iinfo    = 0;
00225     m        = A->nrow;
00226     n        = A->ncol;
00227     min_mn   = SUPERLU_MIN(m, n);
00228     Astore   = A->Store;
00229     a        = Astore->nzval;
00230     asub     = Astore->rowind;
00231     xa_begin = Astore->colbeg;
00232     xa_end   = Astore->colend;
00233 
00234     /* Allocate storage common to the factor routines */
00235     *info = sLUMemInit(fact, work, lwork, m, n, Astore->nnz,
00236                        panel_size, L, U, &Glu, &iwork, &swork);
00237     if ( *info ) return;
00238     
00239     xsup    = Glu.xsup;
00240     supno   = Glu.supno;
00241     xlsub   = Glu.xlsub;
00242     xlusup  = Glu.xlusup;
00243     xusub   = Glu.xusub;
00244     
00245     SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore,
00246          &repfnz, &panel_lsub, &xprune, &marker);
00247     sSetRWork(m, panel_size, swork, &dense, &tempv);
00248     
00249     usepr = (fact == SamePattern_SameRowPerm);
00250     if ( usepr ) {
00251     /* Compute the inverse of perm_r */
00252     iperm_r = (int *) intMalloc(m);
00253     for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k;
00254     iperm_r_allocated = 1;
00255     }
00256     iperm_c = (int *) intMalloc(n);
00257     for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k;
00258 
00259     /* Identify relaxed snodes */
00260     relax_end = (int *) intMalloc(n);
00261     if ( options->SymmetricMode == YES ) {
00262         heap_relax_snode(n, etree, relax, marker, relax_end); 
00263     } else {
00264         relax_snode(n, etree, relax, marker, relax_end); 
00265     }
00266     
00267     ifill (perm_r, m, EMPTY);
00268     ifill (marker, m * NO_MARKER, EMPTY);
00269     supno[0] = -1;
00270     xsup[0]  = xlsub[0] = xusub[0] = xlusup[0] = 0;
00271     w_def    = panel_size;
00272 
00273     /* 
00274      * Work on one "panel" at a time. A panel is one of the following: 
00275      *     (a) a relaxed supernode at the bottom of the etree, or
00276      *     (b) panel_size contiguous columns, defined by the user
00277      */
00278     for (jcol = 0; jcol < min_mn; ) {
00279 
00280     if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */
00281         kcol = relax_end[jcol];   /* end of the relaxed snode */
00282         panel_histo[kcol-jcol+1]++;
00283 
00284         /* --------------------------------------
00285          * Factorize the relaxed supernode(jcol:kcol) 
00286          * -------------------------------------- */
00287         /* Determine the union of the row structure of the snode */
00288         if ( (*info = ssnode_dfs(jcol, kcol, asub, xa_begin, xa_end,
00289                     xprune, marker, &Glu)) != 0 ) {
00290         if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
00291         SUPERLU_FREE (iperm_c);
00292         SUPERLU_FREE (relax_end);
00293         return;
00294         }
00295 
00296             nextu    = xusub[jcol];
00297         nextlu   = xlusup[jcol];
00298         jsupno   = supno[jcol];
00299         fsupc    = xsup[jsupno];
00300         new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1);
00301         nzlumax = Glu.nzlumax;
00302         while ( new_next > nzlumax ) {
00303         if ( (*info = sLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu)) ) {
00304             if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
00305             SUPERLU_FREE (iperm_c);
00306             SUPERLU_FREE (relax_end);
00307             return;
00308         }
00309         }
00310     
00311         for (icol = jcol; icol<= kcol; icol++) {
00312         xusub[icol+1] = nextu;
00313         
00314             /* Scatter into SPA dense[*] */
00315             for (k = xa_begin[icol]; k < xa_end[icol]; k++)
00316                 dense[asub[k]] = a[k];
00317 
00318             /* Numeric update within the snode */
00319             ssnode_bmod(icol, fsupc, dense, tempv, &Glu, stat);
00320 
00321         if ( (*info = spivotL(icol, diag_pivot_thresh, &usepr, perm_r,
00322                       iperm_r, iperm_c, &pivrow, &Glu, stat)) )
00323             if ( iinfo == 0 ) iinfo = *info;
00324         
00325 #ifdef DEBUG
00326         sprint_lu_col("[1]: ", icol, pivrow, xprune, &Glu);
00327 #endif
00328 
00329         }
00330 
00331         jcol = icol;
00332 
00333     } else { /* Work on one panel of panel_size columns */
00334         
00335         /* Adjust panel_size so that a panel won't overlap with the next 
00336          * relaxed snode.
00337          */
00338         panel_size = w_def;
00339         for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++) 
00340         if ( relax_end[k] != EMPTY ) {
00341             panel_size = k - jcol;
00342             break;
00343         }
00344         if ( k == min_mn ) panel_size = min_mn - jcol;
00345         panel_histo[panel_size]++;
00346 
00347         /* symbolic factor on a panel of columns */
00348         spanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1,
00349               dense, panel_lsub, segrep, repfnz, xprune,
00350               marker, parent, xplore, &Glu);
00351         
00352         /* numeric sup-panel updates in topological order */
00353         spanel_bmod(m, panel_size, jcol, nseg1, dense,
00354                 tempv, segrep, repfnz, &Glu, stat);
00355         
00356         /* Sparse LU within the panel, and below panel diagonal */
00357             for ( jj = jcol; jj < jcol + panel_size; jj++) {
00358         k = (jj - jcol) * m; /* column index for w-wide arrays */
00359 
00360         nseg = nseg1;   /* Begin after all the panel segments */
00361 
00362             if ((*info = scolumn_dfs(m, jj, perm_r, &nseg, &panel_lsub[k],
00363                     segrep, &repfnz[k], xprune, marker,
00364                     parent, xplore, &Glu)) != 0) {
00365             if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
00366             SUPERLU_FREE (iperm_c);
00367             SUPERLU_FREE (relax_end);
00368             return;
00369         }
00370 
00371             /* Numeric updates */
00372             if ((*info = scolumn_bmod(jj, (nseg - nseg1), &dense[k],
00373                      tempv, &segrep[nseg1], &repfnz[k],
00374                      jcol, &Glu, stat)) != 0) {
00375             if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
00376             SUPERLU_FREE (iperm_c);
00377             SUPERLU_FREE (relax_end);
00378             return;
00379         }
00380         
00381             /* Copy the U-segments to ucol[*] */
00382         if ((*info = scopy_to_ucol(jj, nseg, segrep, &repfnz[k],
00383                       perm_r, &dense[k], &Glu)) != 0) {
00384             if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
00385             SUPERLU_FREE (iperm_c);
00386             SUPERLU_FREE (relax_end);
00387             return;
00388         }
00389 
00390             if ( (*info = spivotL(jj, diag_pivot_thresh, &usepr, perm_r,
00391                       iperm_r, iperm_c, &pivrow, &Glu, stat)) )
00392             if ( iinfo == 0 ) iinfo = *info;
00393 
00394         /* Prune columns (0:jj-1) using column jj */
00395             spruneL(jj, perm_r, pivrow, nseg, segrep,
00396                         &repfnz[k], xprune, &Glu);
00397 
00398         /* Reset repfnz[] for this column */
00399             resetrep_col (nseg, segrep, &repfnz[k]);
00400         
00401 #ifdef DEBUG
00402         sprint_lu_col("[2]: ", jj, pivrow, xprune, &Glu);
00403 #endif
00404 
00405         }
00406 
00407         jcol += panel_size; /* Move to the next panel */
00408 
00409     } /* else */
00410 
00411     } /* for */
00412 
00413     *info = iinfo;
00414     
00415     if ( m > n ) {
00416     k = 0;
00417         for (i = 0; i < m; ++i) 
00418             if ( perm_r[i] == EMPTY ) {
00419             perm_r[i] = n + k;
00420         ++k;
00421         }
00422     }
00423 
00424     countnz(min_mn, xprune, &nnzL, &nnzU, &Glu);
00425     fixupL(min_mn, perm_r, &Glu);
00426 
00427     sLUWorkFree(iwork, swork, &Glu); /* Free work space and compress storage */
00428 
00429     if ( fact == SamePattern_SameRowPerm ) {
00430         /* L and U structures may have changed due to possibly different
00431        pivoting, even though the storage is available.
00432        There could also be memory expansions, so the array locations
00433            may have changed, */
00434         ((SCformat *)L->Store)->nnz = nnzL;
00435     ((SCformat *)L->Store)->nsuper = Glu.supno[n];
00436     ((SCformat *)L->Store)->nzval = Glu.lusup;
00437     ((SCformat *)L->Store)->nzval_colptr = Glu.xlusup;
00438     ((SCformat *)L->Store)->rowind = Glu.lsub;
00439     ((SCformat *)L->Store)->rowind_colptr = Glu.xlsub;
00440     ((NCformat *)U->Store)->nnz = nnzU;
00441     ((NCformat *)U->Store)->nzval = Glu.ucol;
00442     ((NCformat *)U->Store)->rowind = Glu.usub;
00443     ((NCformat *)U->Store)->colptr = Glu.xusub;
00444     } else {
00445         sCreate_SuperNode_Matrix(L, A->nrow, A->ncol, nnzL, Glu.lusup, 
00446                              Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno,
00447                      Glu.xsup, SLU_SC, SLU_S, SLU_TRLU);
00448         sCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol, 
00449                    Glu.usub, Glu.xusub, SLU_NC, SLU_S, SLU_TRU);
00450     }
00451     
00452     ops[FACT] += ops[TRSV] + ops[GEMV]; 
00453     
00454     if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
00455     SUPERLU_FREE (iperm_c);
00456     SUPERLU_FREE (relax_end);
00457 }