Blender: util.c Source File

Blender V2.61 - r43446
00001 
00004 /*
00005  * -- SuperLU routine (version 3.0) --
00006  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
00007  * and Lawrence Berkeley National Lab.
00008  * October 15, 2003
00009  *
00010  */
00011 /*
00012   Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
00013  
00014   THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
00015   EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
00016  
00017   Permission is hereby granted to use or copy this program for any
00018   purpose, provided the above notices are retained on all copies.
00019   Permission to modify the code and to distribute modified code is
00020   granted, provided the above notices are retained, and a notice that
00021   the code was modified is included with the above copyright notice.
00022 */
00023 
00024 #include <math.h>
00025 #include "ssp_defs.h"
00026 #include "util.h"
00027 
00028 /* prototypes */
00029 flops_t LUFactFlops(SuperLUStat_t *stat);
00030 flops_t LUSolveFlops(SuperLUStat_t *stat);
00031 float SpaSize(int n, int np, float sum_npw);
00032 float DenseSize(int n, float sum_nw);
00033 
00034 /* 
00035  * Global statistics variale
00036  */
00037 
00038 void superlu_abort_and_exit(char* msg)
00039 {
00040     fprintf(stderr, "%s", msg);
00041     exit (-1);
00042 }
00043 
00044 /*
00045  * Set the default values for the options argument.
00046  */
00047 void set_default_options(superlu_options_t *options)
00048 {
00049     options->Fact = DOFACT;
00050     options->Equil = YES;
00051     options->ColPerm = COLAMD;
00052     options->DiagPivotThresh = 1.0;
00053     options->Trans = NOTRANS;
00054     options->IterRefine = NOREFINE;
00055     options->SymmetricMode = NO;
00056     options->PivotGrowth = NO;
00057     options->ConditionNumber = NO;
00058     options->PrintStat = YES;
00059 }
00060 
00061 /* Deallocate the structure pointing to the actual storage of the matrix. */
00062 void
00063 Destroy_SuperMatrix_Store(SuperMatrix *A)
00064 {
00065     SUPERLU_FREE ( A->Store );
00066 }
00067 
00068 void
00069 Destroy_CompCol_Matrix(SuperMatrix *A)
00070 {
00071     SUPERLU_FREE( ((NCformat *)A->Store)->rowind );
00072     SUPERLU_FREE( ((NCformat *)A->Store)->colptr );
00073     SUPERLU_FREE( ((NCformat *)A->Store)->nzval );
00074     SUPERLU_FREE( A->Store );
00075 }
00076 
00077 void
00078 Destroy_CompRow_Matrix(SuperMatrix *A)
00079 {
00080     SUPERLU_FREE( ((NRformat *)A->Store)->colind );
00081     SUPERLU_FREE( ((NRformat *)A->Store)->rowptr );
00082     SUPERLU_FREE( ((NRformat *)A->Store)->nzval );
00083     SUPERLU_FREE( A->Store );
00084 }
00085 
00086 void
00087 Destroy_SuperNode_Matrix(SuperMatrix *A)
00088 {
00089     SUPERLU_FREE ( ((SCformat *)A->Store)->rowind );
00090     SUPERLU_FREE ( ((SCformat *)A->Store)->rowind_colptr );
00091     SUPERLU_FREE ( ((SCformat *)A->Store)->nzval );
00092     SUPERLU_FREE ( ((SCformat *)A->Store)->nzval_colptr );
00093     SUPERLU_FREE ( ((SCformat *)A->Store)->col_to_sup );
00094     SUPERLU_FREE ( ((SCformat *)A->Store)->sup_to_col );
00095     SUPERLU_FREE ( A->Store );
00096 }
00097 
00098 /* A is of type Stype==NCP */
00099 void
00100 Destroy_CompCol_Permuted(SuperMatrix *A)
00101 {
00102     SUPERLU_FREE ( ((NCPformat *)A->Store)->colbeg );
00103     SUPERLU_FREE ( ((NCPformat *)A->Store)->colend );
00104     SUPERLU_FREE ( A->Store );
00105 }
00106 
00107 /* A is of type Stype==DN */
00108 void
00109 Destroy_Dense_Matrix(SuperMatrix *A)
00110 {
00111     DNformat* Astore = A->Store;
00112     SUPERLU_FREE (Astore->nzval);
00113     SUPERLU_FREE ( A->Store );
00114 }
00115 
00116 /*
00117  * Reset repfnz[] for the current column 
00118  */
00119 void
00120 resetrep_col (const int nseg, const int *segrep, int *repfnz)
00121 {
00122     int i, irep;
00123     
00124     for (i = 0; i < nseg; i++) {
00125     irep = segrep[i];
00126     repfnz[irep] = EMPTY;
00127     }
00128 }
00129 
00130 
00131 /*
00132  * Count the total number of nonzeros in factors L and U,  and in the 
00133  * symmetrically reduced L. 
00134  */
00135 void
00136 countnz(const int n, int *xprune, int *nnzL, int *nnzU, GlobalLU_t *Glu)
00137 {
00138     int          nsuper, fsupc, i, j;
00139     int          nnzL0, jlen, irep;
00140     int          *xsup, *xlsub;
00141 
00142     xsup   = Glu->xsup;
00143     xlsub  = Glu->xlsub;
00144     *nnzL  = 0;
00145     *nnzU  = (Glu->xusub)[n];
00146     nnzL0  = 0;
00147     nsuper = (Glu->supno)[n];
00148 
00149     if ( n <= 0 ) return;
00150 
00151     /* 
00152      * For each supernode
00153      */
00154     for (i = 0; i <= nsuper; i++) {
00155     fsupc = xsup[i];
00156     jlen = xlsub[fsupc+1] - xlsub[fsupc];
00157 
00158     for (j = fsupc; j < xsup[i+1]; j++) {
00159         *nnzL += jlen;
00160         *nnzU += j - fsupc + 1;
00161         jlen--;
00162     }
00163     irep = xsup[i+1] - 1;
00164     nnzL0 += xprune[irep] - xlsub[irep];
00165     }
00166     
00167     /* printf("\tNo of nonzeros in symm-reduced L = %d\n", nnzL0);*/
00168 }
00169 
00170 
00171 
00172 /*
00173  * Fix up the data storage lsub for L-subscripts. It removes the subscript
00174  * sets for structural pruning, and applies permuation to the remaining
00175  * subscripts.
00176  */
00177 void
00178 fixupL(const int n, const int *perm_r, GlobalLU_t *Glu)
00179 {
00180     register int nsuper, fsupc, nextl, i, j, k, jstrt;
00181     int          *xsup, *lsub, *xlsub;
00182 
00183     if ( n <= 1 ) return;
00184 
00185     xsup   = Glu->xsup;
00186     lsub   = Glu->lsub;
00187     xlsub  = Glu->xlsub;
00188     nextl  = 0;
00189     nsuper = (Glu->supno)[n];
00190     
00191     /* 
00192      * For each supernode ...
00193      */
00194     for (i = 0; i <= nsuper; i++) {
00195     fsupc = xsup[i];
00196     jstrt = xlsub[fsupc];
00197     xlsub[fsupc] = nextl;
00198     for (j = jstrt; j < xlsub[fsupc+1]; j++) {
00199         lsub[nextl] = perm_r[lsub[j]]; /* Now indexed into P*A */
00200         nextl++;
00201     }
00202     for (k = fsupc+1; k < xsup[i+1]; k++) 
00203             xlsub[k] = nextl;   /* Other columns in supernode i */
00204 
00205     }
00206 
00207     xlsub[n] = nextl;
00208 }
00209 
00210 
00211 /*
00212  * Diagnostic print of segment info after panel_dfs().
00213  */
00214 void print_panel_seg(int n, int w, int jcol, int nseg, 
00215              int *segrep, int *repfnz)
00216 {
00217     int j, k;
00218     
00219     for (j = jcol; j < jcol+w; j++) {
00220     printf("\tcol %d:\n", j);
00221     for (k = 0; k < nseg; k++)
00222         printf("\t\tseg %d, segrep %d, repfnz %d\n", k, 
00223             segrep[k], repfnz[(j-jcol)*n + segrep[k]]);
00224     }
00225 
00226 }
00227 
00228 
00229 void
00230 StatInit(SuperLUStat_t *stat)
00231 {
00232     register int i, w, panel_size, relax;
00233 
00234     panel_size = sp_ienv(1);
00235     relax = sp_ienv(2);
00236     w = SUPERLU_MAX(panel_size, relax);
00237     stat->panel_histo = intCalloc(w+1);
00238     stat->utime = (double *) SUPERLU_MALLOC(NPHASES * sizeof(double));
00239     if (!stat->utime) ABORT("SUPERLU_MALLOC fails for stat->utime");
00240     stat->ops = (flops_t *) SUPERLU_MALLOC(NPHASES * sizeof(flops_t));
00241     if (!stat->ops) ABORT("SUPERLU_MALLOC fails for stat->ops");
00242     for (i = 0; i < NPHASES; ++i) {
00243         stat->utime[i] = 0.;
00244         stat->ops[i] = 0.;
00245     }
00246 }
00247 
00248 
00249 void
00250 StatPrint(SuperLUStat_t *stat)
00251 {
00252     double         *utime;
00253     flops_t        *ops;
00254 
00255     utime = stat->utime;
00256     ops   = stat->ops;
00257     printf("Factor time  = %8.2f\n", utime[FACT]);
00258     if ( utime[FACT] != 0.0 )
00259       printf("Factor flops = %e\tMflops = %8.2f\n", ops[FACT],
00260          ops[FACT]*1e-6/utime[FACT]);
00261 
00262     printf("Solve time   = %8.2f\n", utime[SOLVE]);
00263     if ( utime[SOLVE] != 0.0 )
00264       printf("Solve flops = %e\tMflops = %8.2f\n", ops[SOLVE],
00265          ops[SOLVE]*1e-6/utime[SOLVE]);
00266 
00267 }
00268 
00269 
00270 void
00271 StatFree(SuperLUStat_t *stat)
00272 {
00273     SUPERLU_FREE(stat->panel_histo);
00274     SUPERLU_FREE(stat->utime);
00275     SUPERLU_FREE(stat->ops);
00276 }
00277 
00278 
00279 flops_t
00280 LUFactFlops(SuperLUStat_t *stat)
00281 {
00282     return (stat->ops[FACT]);
00283 }
00284 
00285 flops_t
00286 LUSolveFlops(SuperLUStat_t *stat)
00287 {
00288     return (stat->ops[SOLVE]);
00289 }
00290 
00291 
00292 
00293 
00294 
00295 /* 
00296  * Fills an integer array with a given value.
00297  */
00298 void ifill(int *a, int alen, int ival)
00299 {
00300     register int i;
00301     for (i = 0; i < alen; i++) a[i] = ival;
00302 }
00303 
00304 
00305 
00306 /* 
00307  * Get the statistics of the supernodes 
00308  */
00309 #define NBUCKS 10
00310 static  int max_sup_size;
00311 
00312 void super_stats(int nsuper, int *xsup)
00313 {
00314     register int nsup1 = 0;
00315     int          i, isize, whichb, bl, bh;
00316     int          bucket[NBUCKS];
00317 
00318     max_sup_size = 0;
00319 
00320     for (i = 0; i <= nsuper; i++) {
00321     isize = xsup[i+1] - xsup[i];
00322     if ( isize == 1 ) nsup1++;
00323     if ( max_sup_size < isize ) max_sup_size = isize;   
00324     }
00325 
00326     printf("    Supernode statistics:\n\tno of super = %d\n", nsuper+1);
00327     printf("\tmax supernode size = %d\n", max_sup_size);
00328     printf("\tno of size 1 supernodes = %d\n", nsup1);
00329 
00330     /* Histogram of the supernode sizes */
00331     ifill (bucket, NBUCKS, 0);
00332 
00333     for (i = 0; i <= nsuper; i++) {
00334         isize = xsup[i+1] - xsup[i];
00335         whichb = (float) isize / max_sup_size * NBUCKS;
00336         if (whichb >= NBUCKS) whichb = NBUCKS - 1;
00337         bucket[whichb]++;
00338     }
00339     
00340     printf("\tHistogram of supernode sizes:\n");
00341     for (i = 0; i < NBUCKS; i++) {
00342         bl = (float) i * max_sup_size / NBUCKS;
00343         bh = (float) (i+1) * max_sup_size / NBUCKS;
00344         printf("\tsnode: %d-%d\t\t%d\n", bl+1, bh, bucket[i]);
00345     }
00346 
00347 }
00348 
00349 
00350 float SpaSize(int n, int np, float sum_npw)
00351 {
00352     return (sum_npw*8 + np*8 + n*4)/1024.;
00353 }
00354 
00355 float DenseSize(int n, float sum_nw)
00356 {
00357     return (sum_nw*8 + n*8)/1024.;;
00358 }
00359 
00360 
00361 
00362 /*
00363  * Check whether repfnz[] == EMPTY after reset.
00364  */
00365 void check_repfnz(int n, int w, int jcol, int *repfnz)
00366 {
00367     int jj, k;
00368 
00369     for (jj = jcol; jj < jcol+w; jj++) 
00370     for (k = 0; k < n; k++)
00371         if ( repfnz[(jj-jcol)*n + k] != EMPTY ) {
00372         fprintf(stderr, "col %d, repfnz_col[%d] = %d\n", jj,
00373             k, repfnz[(jj-jcol)*n + k]);
00374         ABORT("check_repfnz");
00375         }
00376 }
00377 
00378 
00379 /* Print a summary of the testing results. */
00380 void
00381 PrintSumm(char *type, int nfail, int nrun, int nerrs)
00382 {
00383     if ( nfail > 0 )
00384     printf("%3s driver: %d out of %d tests failed to pass the threshold\n",
00385            type, nfail, nrun);
00386     else
00387     printf("All tests for %3s driver passed the threshold (%6d tests run)\n", type, nrun);
00388 
00389     if ( nerrs > 0 )
00390     printf("%6d error messages recorded\n", nerrs);
00391 }
00392 
00393 
00394 int print_int_vec(char *what, int n, int *vec)
00395 {
00396     int i;
00397     printf("%s\n", what);
00398     for (i = 0; i < n; ++i) printf("%d\t%d\n", i, vec[i]);
00399     return 0;
00400 }