Blender: ssnode_bmod.c Source File

Blender V2.61 - r43446
00001 
00005 /*
00006  * -- SuperLU routine (version 3.0) --
00007  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
00008  * and Lawrence Berkeley National Lab.
00009  * October 15, 2003
00010  *
00011  */
00012 /*
00013   Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
00014  
00015   THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
00016   EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
00017  
00018   Permission is hereby granted to use or copy this program for any
00019   purpose, provided the above notices are retained on all copies.
00020   Permission to modify the code and to distribute modified code is
00021   granted, provided the above notices are retained, and a notice that
00022   the code was modified is included with the above copyright notice.
00023 */
00024 
00025 #include "ssp_defs.h"
00026 
00027 void slsolve(int, int, float*, float*);
00028 void smatvec(int, int, int, float*, float*, float*);
00029 
00030 /*
00031  * Performs numeric block updates within the relaxed snode. 
00032  */
00033 int
00034 ssnode_bmod (
00035         const int  jcol,      /* in */
00036         const int  fsupc,     /* in */
00037         float     *dense,    /* in */
00038         float     *tempv,    /* working array */
00039         GlobalLU_t *Glu,      /* modified */
00040         SuperLUStat_t *stat   /* output */
00041         )
00042 {
00043 #ifdef USE_VENDOR_BLAS
00044 #ifdef _CRAY
00045     _fcd ftcs1 = _cptofcd("L", strlen("L")),
00046      ftcs2 = _cptofcd("N", strlen("N")),
00047      ftcs3 = _cptofcd("U", strlen("U"));
00048 #endif
00049     int            incx = 1, incy = 1;
00050     float         alpha = -1.0, beta = 1.0;
00051 #endif
00052 
00053     int            luptr, nsupc, nsupr, nrow;
00054     int            isub, irow, i, iptr; 
00055     register int   ufirst, nextlu;
00056     int            *lsub, *xlsub;
00057     float         *lusup;
00058     int            *xlusup;
00059     flops_t *ops = stat->ops;
00060 
00061     lsub    = Glu->lsub;
00062     xlsub   = Glu->xlsub;
00063     lusup   = Glu->lusup;
00064     xlusup  = Glu->xlusup;
00065 
00066     nextlu = xlusup[jcol];
00067     
00068     /*
00069      *  Process the supernodal portion of L\U[*,j]
00070      */
00071     for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
00072     irow = lsub[isub];
00073     lusup[nextlu] = dense[irow];
00074     dense[irow] = 0;
00075     ++nextlu;
00076     }
00077 
00078     xlusup[jcol + 1] = nextlu;  /* Initialize xlusup for next column */
00079     
00080     if ( fsupc < jcol ) {
00081 
00082     luptr = xlusup[fsupc];
00083     nsupr = xlsub[fsupc+1] - xlsub[fsupc];
00084     nsupc = jcol - fsupc;   /* Excluding jcol */
00085     ufirst = xlusup[jcol];  /* Points to the beginning of column
00086                    jcol in supernode L\U(jsupno). */
00087     nrow = nsupr - nsupc;
00088 
00089     ops[TRSV] += nsupc * (nsupc - 1);
00090     ops[GEMV] += 2 * nrow * nsupc;
00091 
00092 #ifdef USE_VENDOR_BLAS
00093 #ifdef _CRAY
00094     STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], &nsupr, 
00095           &lusup[ufirst], &incx );
00096     SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 
00097         &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
00098 #else
00099     strsv_( "L", "N", "U", &nsupc, &lusup[luptr], &nsupr, 
00100           &lusup[ufirst], &incx );
00101     sgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 
00102         &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
00103 #endif
00104 #else
00105     slsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
00106     smatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc], 
00107             &lusup[ufirst], &tempv[0] );
00108 
00109         /* Scatter tempv[*] into lusup[*] */
00110     iptr = ufirst + nsupc;
00111     for (i = 0; i < nrow; i++) {
00112         lusup[iptr++] -= tempv[i];
00113         tempv[i] = 0.0;
00114     }
00115 #endif
00116 
00117     }
00118 
00119     return 0;
00120 }