Blender: scolumn_bmod.c Source File

Blender V2.61 - r43446
00001 
00005 /*
00006  * -- SuperLU routine (version 3.0) --
00007  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
00008  * and Lawrence Berkeley National Lab.
00009  * October 15, 2003
00010  *
00011  */
00012 /*
00013   Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
00014  
00015   THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
00016   EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
00017  
00018   Permission is hereby granted to use or copy this program for any
00019   purpose, provided the above notices are retained on all copies.
00020   Permission to modify the code and to distribute modified code is
00021   granted, provided the above notices are retained, and a notice that
00022   the code was modified is included with the above copyright notice.
00023 */
00024 
00025 #include <stdio.h>
00026 #include <stdlib.h>
00027 #include "ssp_defs.h"
00028 
00029 /* 
00030  * Function prototypes 
00031  */
00032 void susolve(int, int, float*, float*);
00033 void slsolve(int, int, float*, float*);
00034 void smatvec(int, int, int, float*, float*, float*);
00035 
00036 
00037 
00038 /* Return value:   0 - successful return
00039  *               > 0 - number of bytes allocated when run out of space
00040  */
00041 int
00042 scolumn_bmod (
00043          const int  jcol,     /* in */
00044          const int  nseg,     /* in */
00045          float     *dense,    /* in */
00046          float     *tempv,    /* working array */
00047          int        *segrep,  /* in */
00048          int        *repfnz,  /* in */
00049          int        fpanelc,  /* in -- first column in the current panel */
00050          GlobalLU_t *Glu,     /* modified */
00051          SuperLUStat_t *stat  /* output */
00052          )
00053 {
00054 /*
00055  * Purpose:
00056  * ========
00057  *    Performs numeric block updates (sup-col) in topological order.
00058  *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
00059  *    Special processing on the supernodal portion of L\U[*,j]
00060  *
00061  */
00062 #ifdef _CRAY
00063     _fcd ftcs1 = _cptofcd("L", strlen("L")),
00064          ftcs2 = _cptofcd("N", strlen("N")),
00065          ftcs3 = _cptofcd("U", strlen("U"));
00066 #endif
00067 
00068 #ifdef USE_VENDOR_BLAS
00069     int         incx = 1, incy = 1;
00070     float      alpha, beta;
00071 #endif
00072     
00073     /* krep = representative of current k-th supernode
00074      * fsupc = first supernodal column
00075      * nsupc = no of columns in supernode
00076      * nsupr = no of rows in supernode (used as leading dimension)
00077      * luptr = location of supernodal LU-block in storage
00078      * kfnz = first nonz in the k-th supernodal segment
00079      * no_zeros = no of leading zeros in a supernodal U-segment
00080      */
00081     float       ukj, ukj1, ukj2;
00082     int          luptr, luptr1, luptr2;
00083     int          fsupc, nsupc, nsupr, segsze;
00084     int          nrow;    /* No of rows in the matrix of matrix-vector */
00085     int          jcolp1, jsupno, k, ksub, krep, krep_ind, ksupno;
00086     register int lptr, kfnz, isub, irow, i;
00087     register int no_zeros, new_next; 
00088     int          ufirst, nextlu;
00089     int          fst_col; /* First column within small LU update */
00090     int          d_fsupc; /* Distance between the first column of the current
00091                  panel and the first column of the current snode. */
00092     int          *xsup, *supno;
00093     int          *lsub, *xlsub;
00094     float       *lusup;
00095     int          *xlusup;
00096     int          nzlumax;
00097     float       *tempv1;
00098     float      zero = 0.0;
00099 #ifdef USE_VENDOR_BLAS
00100     float      one = 1.0;
00101     float      none = -1.0;
00102 #endif
00103     int          mem_error;
00104     flops_t      *ops = stat->ops;
00105 
00106     xsup    = Glu->xsup;
00107     supno   = Glu->supno;
00108     lsub    = Glu->lsub;
00109     xlsub   = Glu->xlsub;
00110     lusup   = Glu->lusup;
00111     xlusup  = Glu->xlusup;
00112     nzlumax = Glu->nzlumax;
00113     jcolp1 = jcol + 1;
00114     jsupno = supno[jcol];
00115     
00116     /* 
00117      * For each nonz supernode segment of U[*,j] in topological order 
00118      */
00119     k = nseg - 1;
00120     for (ksub = 0; ksub < nseg; ksub++) {
00121 
00122     krep = segrep[k];
00123     k--;
00124     ksupno = supno[krep];
00125     if ( jsupno != ksupno ) { /* Outside the rectangular supernode */
00126 
00127         fsupc = xsup[ksupno];
00128         fst_col = SUPERLU_MAX ( fsupc, fpanelc );
00129 
00130         /* Distance from the current supernode to the current panel; 
00131            d_fsupc=0 if fsupc > fpanelc. */
00132         d_fsupc = fst_col - fsupc; 
00133 
00134         luptr = xlusup[fst_col] + d_fsupc;
00135         lptr = xlsub[fsupc] + d_fsupc;
00136 
00137         kfnz = repfnz[krep];
00138         kfnz = SUPERLU_MAX ( kfnz, fpanelc );
00139 
00140         segsze = krep - kfnz + 1;
00141         nsupc = krep - fst_col + 1;
00142         nsupr = xlsub[fsupc+1] - xlsub[fsupc];  /* Leading dimension */
00143         nrow = nsupr - d_fsupc - nsupc;
00144         krep_ind = lptr + nsupc - 1;
00145 
00146         ops[TRSV] += segsze * (segsze - 1);
00147         ops[GEMV] += 2 * nrow * segsze;
00148 
00149 
00150         /* 
00151          * Case 1: Update U-segment of size 1 -- col-col update 
00152          */
00153         if ( segsze == 1 ) {
00154         ukj = dense[lsub[krep_ind]];
00155         luptr += nsupr*(nsupc-1) + nsupc;
00156 
00157         for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
00158             irow = lsub[i];
00159             dense[irow] -=  ukj*lusup[luptr];
00160             luptr++;
00161         }
00162 
00163         } else if ( segsze <= 3 ) {
00164         ukj = dense[lsub[krep_ind]];
00165         luptr += nsupr*(nsupc-1) + nsupc-1;
00166         ukj1 = dense[lsub[krep_ind - 1]];
00167         luptr1 = luptr - nsupr;
00168 
00169         if ( segsze == 2 ) { /* Case 2: 2cols-col update */
00170             ukj -= ukj1 * lusup[luptr1];
00171             dense[lsub[krep_ind]] = ukj;
00172             for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
00173                 irow = lsub[i];
00174                 luptr++;
00175                 luptr1++;
00176                 dense[irow] -= ( ukj*lusup[luptr]
00177                     + ukj1*lusup[luptr1] );
00178             }
00179         } else { /* Case 3: 3cols-col update */
00180             ukj2 = dense[lsub[krep_ind - 2]];
00181             luptr2 = luptr1 - nsupr;
00182             ukj1 -= ukj2 * lusup[luptr2-1];
00183             ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
00184             dense[lsub[krep_ind]] = ukj;
00185             dense[lsub[krep_ind-1]] = ukj1;
00186             for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
00187                 irow = lsub[i];
00188                 luptr++;
00189                 luptr1++;
00190             luptr2++;
00191                 dense[irow] -= ( ukj*lusup[luptr]
00192                  + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
00193             }
00194         }
00195 
00196 
00197 
00198         } else {
00199         /*
00200          * Case: sup-col update
00201          * Perform a triangular solve and block update,
00202          * then scatter the result of sup-col update to dense
00203          */
00204 
00205         no_zeros = kfnz - fst_col;
00206 
00207             /* Copy U[*,j] segment from dense[*] to tempv[*] */
00208             isub = lptr + no_zeros;
00209             for (i = 0; i < segsze; i++) {
00210             irow = lsub[isub];
00211             tempv[i] = dense[irow];
00212             ++isub; 
00213             }
00214 
00215             /* Dense triangular solve -- start effective triangle */
00216         luptr += nsupr * no_zeros + no_zeros; 
00217         
00218 #ifdef USE_VENDOR_BLAS
00219 #ifdef _CRAY
00220         STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
00221                &nsupr, tempv, &incx );
00222 #else       
00223         strsv_( "L", "N", "U", &segsze, &lusup[luptr], 
00224                &nsupr, tempv, &incx );
00225 #endif      
00226         luptr += segsze;  /* Dense matrix-vector */
00227         tempv1 = &tempv[segsze];
00228                 alpha = one;
00229                 beta = zero;
00230 #ifdef _CRAY
00231         SGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 
00232                &nsupr, tempv, &incx, &beta, tempv1, &incy );
00233 #else
00234         sgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 
00235                &nsupr, tempv, &incx, &beta, tempv1, &incy );
00236 #endif
00237 #else
00238         slsolve ( nsupr, segsze, &lusup[luptr], tempv );
00239 
00240         luptr += segsze;  /* Dense matrix-vector */
00241         tempv1 = &tempv[segsze];
00242         smatvec (nsupr, nrow , segsze, &lusup[luptr], tempv, tempv1);
00243 #endif
00244         
00245         
00246                 /* Scatter tempv[] into SPA dense[] as a temporary storage */
00247                 isub = lptr + no_zeros;
00248                 for (i = 0; i < segsze; i++) {
00249                     irow = lsub[isub];
00250                     dense[irow] = tempv[i];
00251                     tempv[i] = zero;
00252                     ++isub;
00253                 }
00254 
00255         /* Scatter tempv1[] into SPA dense[] */
00256         for (i = 0; i < nrow; i++) {
00257             irow = lsub[isub];
00258             dense[irow] -= tempv1[i];
00259             tempv1[i] = zero;
00260             ++isub;
00261         }
00262         }
00263         
00264     } /* if jsupno ... */
00265 
00266     } /* for each segment... */
00267 
00268     /*
00269      *  Process the supernodal portion of L\U[*,j]
00270      */
00271     nextlu = xlusup[jcol];
00272     fsupc = xsup[jsupno];
00273 
00274     /* Copy the SPA dense into L\U[*,j] */
00275     new_next = nextlu + xlsub[fsupc+1] - xlsub[fsupc];
00276     while ( new_next > nzlumax ) {
00277     if ((mem_error = sLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu)))
00278         return (mem_error);
00279     lusup = Glu->lusup;
00280     lsub = Glu->lsub;
00281     }
00282 
00283     for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
00284     irow = lsub[isub];
00285     lusup[nextlu] = dense[irow];
00286         dense[irow] = zero;
00287     ++nextlu;
00288     }
00289 
00290     xlusup[jcolp1] = nextlu;    /* Close L\U[*,jcol] */
00291 
00292     /* For more updates within the panel (also within the current supernode), 
00293      * should start from the first column of the panel, or the first column 
00294      * of the supernode, whichever is bigger. There are 2 cases:
00295      *    1) fsupc < fpanelc, then fst_col := fpanelc
00296      *    2) fsupc >= fpanelc, then fst_col := fsupc
00297      */
00298     fst_col = SUPERLU_MAX ( fsupc, fpanelc );
00299 
00300     if ( fst_col < jcol ) {
00301 
00302     /* Distance between the current supernode and the current panel.
00303        d_fsupc=0 if fsupc >= fpanelc. */
00304     d_fsupc = fst_col - fsupc;
00305 
00306     luptr = xlusup[fst_col] + d_fsupc;
00307     nsupr = xlsub[fsupc+1] - xlsub[fsupc];  /* Leading dimension */
00308     nsupc = jcol - fst_col; /* Excluding jcol */
00309     nrow = nsupr - d_fsupc - nsupc;
00310 
00311     /* Points to the beginning of jcol in snode L\U(jsupno) */
00312     ufirst = xlusup[jcol] + d_fsupc;    
00313 
00314     ops[TRSV] += nsupc * (nsupc - 1);
00315     ops[GEMV] += 2 * nrow * nsupc;
00316     
00317 #ifdef USE_VENDOR_BLAS
00318 #ifdef _CRAY
00319     STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], 
00320            &nsupr, &lusup[ufirst], &incx );
00321 #else
00322     strsv_( "L", "N", "U", &nsupc, &lusup[luptr], 
00323            &nsupr, &lusup[ufirst], &incx );
00324 #endif
00325     
00326     alpha = none; beta = one; /* y := beta*y + alpha*A*x */
00327 
00328 #ifdef _CRAY
00329     SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
00330            &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
00331 #else
00332     sgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
00333            &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
00334 #endif
00335 #else
00336     slsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
00337 
00338     smatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc],
00339         &lusup[ufirst], tempv );
00340     
00341         /* Copy updates from tempv[*] into lusup[*] */
00342     isub = ufirst + nsupc;
00343     for (i = 0; i < nrow; i++) {
00344         lusup[isub] -= tempv[i];
00345         tempv[i] = 0.0;
00346         ++isub;
00347     }
00348 
00349 #endif
00350     
00351     
00352     } /* if fst_col < jcol ... */ 
00353 
00354     return 0;
00355 }