Blender V2.61 - r43446

opennl.c

Go to the documentation of this file.
00001 
00004 /*
00005  *
00006  *  OpenNL: Numerical Library
00007  *  Copyright (C) 2004 Bruno Levy
00008  *
00009  *  This program is free software; you can redistribute it and/or modify
00010  *  it under the terms of the GNU General Public License as published by
00011  *  the Free Software Foundation; either version 2 of the License, or
00012  *  (at your option) any later version.
00013  *
00014  *  This program is distributed in the hope that it will be useful,
00015  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017  *  GNU General Public License for more details.
00018  *
00019  *  You should have received a copy of the GNU General Public License
00020  *  along with this program; if not, write to the Free Software
00021  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00022  *
00023  *  If you modify this software, you should include a notice giving the
00024  *  name of the person performing the modification, the date of modification,
00025  *  and the reason for such modification.
00026  *
00027  *  Contact: Bruno Levy
00028  *
00029  *   levy@loria.fr
00030  *
00031  *   ISA Project
00032  *   LORIA, INRIA Lorraine, 
00033  *   Campus Scientifique, BP 239
00034  *   54506 VANDOEUVRE LES NANCY CEDEX 
00035  *   FRANCE
00036  *
00037  *  Note that the GNU General Public License does not permit incorporating
00038  *  the Software into proprietary programs. 
00039  */
00040 
00041 #include "ONL_opennl.h"
00042 
00043 #include <stdlib.h>
00044 #include <stdio.h>
00045 #include <string.h>
00046 #include <math.h>
00047 
00048 #ifdef NL_PARANOID
00049 #ifndef NL_DEBUG
00050 #define NL_DEBUG
00051 #endif
00052 #endif
00053 
00054 /* SuperLU includes */
00055 #include <ssp_defs.h>
00056 #include <util.h>
00057 
00058 /************************************************************************************/
00059 /* Assertions */
00060 
00061 
00062 static void __nl_assertion_failed(char* cond, char* file, int line) {
00063     fprintf(
00064         stderr, 
00065         "OpenNL assertion failed: %s, file:%s, line:%d\n",
00066         cond,file,line
00067     );
00068     abort();
00069 }
00070 
00071 static void __nl_range_assertion_failed(
00072     float x, float min_val, float max_val, char* file, int line
00073 ) {
00074     fprintf(
00075         stderr, 
00076         "OpenNL range assertion failed: %f in [ %f ... %f ], file:%s, line:%d\n",
00077         x, min_val, max_val, file,line
00078     );
00079     abort();
00080 }
00081 
00082 static void __nl_should_not_have_reached(char* file, int line) {
00083     fprintf(
00084         stderr, 
00085         "OpenNL should not have reached this point: file:%s, line:%d\n",
00086         file,line
00087     );
00088     abort();
00089 }
00090 
00091 
00092 #define __nl_assert(x) {                                        \
00093     if(!(x)) {                                                \
00094         __nl_assertion_failed(#x,__FILE__, __LINE__);         \
00095     }                                                          \
00096 } 
00097 
00098 #define __nl_range_assert(x,min_val,max_val) {                \
00099     if(((x) < (min_val)) || ((x) > (max_val))) {                \
00100         __nl_range_assertion_failed(x, min_val, max_val,        \
00101             __FILE__, __LINE__                                \
00102         );                                                   \
00103     }                                                          \
00104 }
00105 
00106 #define __nl_assert_not_reached {                              \
00107     __nl_should_not_have_reached(__FILE__, __LINE__);         \
00108 }
00109 
00110 #ifdef NL_DEBUG
00111 #define __nl_debug_assert(x) __nl_assert(x)
00112 #define __nl_debug_range_assert(x,min_val,max_val) __nl_range_assert(x,min_val,max_val)
00113 #else
00114 #define __nl_debug_assert(x) 
00115 #define __nl_debug_range_assert(x,min_val,max_val) 
00116 #endif
00117 
00118 #ifdef NL_PARANOID
00119 #define __nl_parano_assert(x) __nl_assert(x)
00120 #define __nl_parano_range_assert(x,min_val,max_val) __nl_range_assert(x,min_val,max_val)
00121 #else
00122 #define __nl_parano_assert(x) 
00123 #define __nl_parano_range_assert(x,min_val,max_val) 
00124 #endif
00125 
00126 /************************************************************************************/
00127 /* classic macros */
00128 
00129 #ifndef MIN
00130 #define MIN(x,y) (((x) < (y)) ? (x) : (y)) 
00131 #endif
00132 
00133 #ifndef MAX
00134 #define MAX(x,y) (((x) > (y)) ? (x) : (y)) 
00135 #endif
00136 
00137 /************************************************************************************/
00138 /* memory management */
00139 
00140 #define __NL_NEW(T)             (T*)(calloc(1, sizeof(T))) 
00141 #define __NL_NEW_ARRAY(T,NB)       (T*)(calloc((NB),sizeof(T))) 
00142 #define __NL_RENEW_ARRAY(T,x,NB)   (T*)(realloc(x,(NB)*sizeof(T))) 
00143 #define __NL_DELETE(x)           free(x); x = NULL 
00144 #define __NL_DELETE_ARRAY(x)       free(x); x = NULL
00145 
00146 #define __NL_CLEAR(T, x)           memset(x, 0, sizeof(T)) 
00147 #define __NL_CLEAR_ARRAY(T,x,NB)   memset(x, 0, (NB)*sizeof(T)) 
00148 
00149 /************************************************************************************/
00150 /* Dynamic arrays for sparse row/columns */
00151 
00152 typedef struct {
00153     NLuint   index;
00154     NLfloat value;
00155 } __NLCoeff;
00156 
00157 typedef struct {
00158     NLuint size;
00159     NLuint capacity;
00160     __NLCoeff* coeff;
00161 } __NLRowColumn;
00162 
00163 static void __nlRowColumnConstruct(__NLRowColumn* c) {
00164     c->size  = 0;
00165     c->capacity = 0;
00166     c->coeff    = NULL;
00167 }
00168 
00169 static void __nlRowColumnDestroy(__NLRowColumn* c) {
00170     __NL_DELETE_ARRAY(c->coeff);
00171 #ifdef NL_PARANOID
00172     __NL_CLEAR(__NLRowColumn, c); 
00173 #endif
00174 }
00175 
00176 static void __nlRowColumnGrow(__NLRowColumn* c) {
00177     if(c->capacity != 0) {
00178         c->capacity = 2 * c->capacity;
00179         c->coeff = __NL_RENEW_ARRAY(__NLCoeff, c->coeff, c->capacity);
00180     } else {
00181         c->capacity = 4;
00182         c->coeff = __NL_NEW_ARRAY(__NLCoeff, c->capacity);
00183     }
00184 }
00185 
00186 static void __nlRowColumnAdd(__NLRowColumn* c, NLint index, NLfloat value) {
00187     NLuint i;
00188     for(i=0; i<c->size; i++) {
00189         if(c->coeff[i].index == (NLuint)index) {
00190             c->coeff[i].value += value;
00191             return;
00192         }
00193     }
00194     if(c->size == c->capacity) {
00195         __nlRowColumnGrow(c);
00196     }
00197     c->coeff[c->size].index = index;
00198     c->coeff[c->size].value = value;
00199     c->size++;
00200 }
00201 
00202 /* Does not check whether the index already exists */
00203 static void __nlRowColumnAppend(__NLRowColumn* c, NLint index, NLfloat value) {
00204     if(c->size == c->capacity) {
00205         __nlRowColumnGrow(c);
00206     }
00207     c->coeff[c->size].index = index;
00208     c->coeff[c->size].value = value;
00209     c->size++;
00210 }
00211 
00212 static void __nlRowColumnClear(__NLRowColumn* c) {
00213     c->size  = 0;
00214     c->capacity = 0;
00215     __NL_DELETE_ARRAY(c->coeff);
00216 }
00217 
00218 /************************************************************************************/
00219 /* SparseMatrix data structure */
00220 
00221 #define __NL_ROWS     1
00222 #define __NL_COLUMNS   2
00223 #define __NL_SYMMETRIC 4
00224 
00225 typedef struct {
00226     NLuint m;
00227     NLuint n;
00228     NLuint diag_size;
00229     NLenum storage;
00230     __NLRowColumn* row;
00231     __NLRowColumn* column;
00232     NLfloat*      diag;
00233 } __NLSparseMatrix;
00234 
00235 
00236 static void __nlSparseMatrixConstruct(
00237     __NLSparseMatrix* M, NLuint m, NLuint n, NLenum storage
00238 ) {
00239     NLuint i;
00240     M->m = m;
00241     M->n = n;
00242     M->storage = storage;
00243     if(storage & __NL_ROWS) {
00244         M->row = __NL_NEW_ARRAY(__NLRowColumn, m);
00245         for(i=0; i<m; i++) {
00246             __nlRowColumnConstruct(&(M->row[i]));
00247         }
00248     } else {
00249         M->row = NULL;
00250     }
00251 
00252     if(storage & __NL_COLUMNS) {
00253         M->column = __NL_NEW_ARRAY(__NLRowColumn, n);
00254         for(i=0; i<n; i++) {
00255             __nlRowColumnConstruct(&(M->column[i]));
00256         }
00257     } else {
00258         M->column = NULL;
00259     }
00260 
00261     M->diag_size = MIN(m,n);
00262     M->diag = __NL_NEW_ARRAY(NLfloat, M->diag_size);
00263 }
00264 
00265 static void __nlSparseMatrixDestroy(__NLSparseMatrix* M) {
00266     NLuint i;
00267     __NL_DELETE_ARRAY(M->diag);
00268     if(M->storage & __NL_ROWS) {
00269         for(i=0; i<M->m; i++) {
00270             __nlRowColumnDestroy(&(M->row[i]));
00271         }
00272         __NL_DELETE_ARRAY(M->row);
00273     }
00274     if(M->storage & __NL_COLUMNS) {
00275         for(i=0; i<M->n; i++) {
00276             __nlRowColumnDestroy(&(M->column[i]));
00277         }
00278         __NL_DELETE_ARRAY(M->column);
00279     }
00280 #ifdef NL_PARANOID
00281     __NL_CLEAR(__NLSparseMatrix,M);
00282 #endif
00283 }
00284 
00285 static void __nlSparseMatrixAdd(
00286     __NLSparseMatrix* M, NLuint i, NLuint j, NLfloat value
00287 ) {
00288     __nl_parano_range_assert(i, 0, M->m - 1);
00289     __nl_parano_range_assert(j, 0, M->n - 1);
00290     if((M->storage & __NL_SYMMETRIC) && (j > i)) {
00291         return;
00292     }
00293     if(i == j) {
00294         M->diag[i] += value;
00295     }
00296     if(M->storage & __NL_ROWS) {
00297         __nlRowColumnAdd(&(M->row[i]), j, value);
00298     }
00299     if(M->storage & __NL_COLUMNS) {
00300         __nlRowColumnAdd(&(M->column[j]), i, value);
00301     }
00302 }
00303 
00304 static void __nlSparseMatrixClear( __NLSparseMatrix* M) {
00305     NLuint i;
00306     if(M->storage & __NL_ROWS) {
00307         for(i=0; i<M->m; i++) {
00308             __nlRowColumnClear(&(M->row[i]));
00309         }
00310     }
00311     if(M->storage & __NL_COLUMNS) {
00312         for(i=0; i<M->n; i++) {
00313             __nlRowColumnClear(&(M->column[i]));
00314         }
00315     }
00316     __NL_CLEAR_ARRAY(NLfloat, M->diag, M->diag_size);   
00317 }
00318 
00319 /* Returns the number of non-zero coefficients */
00320 static NLuint __nlSparseMatrixNNZ( __NLSparseMatrix* M) {
00321     NLuint nnz = 0;
00322     NLuint i;
00323     if(M->storage & __NL_ROWS) {
00324         for(i = 0; i<M->m; i++) {
00325             nnz += M->row[i].size;
00326         }
00327     } else if (M->storage & __NL_COLUMNS) {
00328         for(i = 0; i<M->n; i++) {
00329             nnz += M->column[i].size;
00330         }
00331     } else {
00332         __nl_assert_not_reached;
00333     }
00334     return nnz;
00335 }
00336 
00337 /************************************************************************************/
00338 /* SparseMatrix x Vector routines, internal helper routines */
00339 
00340 static void __nlSparseMatrix_mult_rows_symmetric(
00341     __NLSparseMatrix* A, NLfloat* x, NLfloat* y
00342 ) {
00343     NLuint m = A->m;
00344     NLuint i,ij;
00345     __NLRowColumn* Ri = NULL;
00346     __NLCoeff* c = NULL;
00347     for(i=0; i<m; i++) {
00348         y[i] = 0;
00349         Ri = &(A->row[i]);
00350         for(ij=0; ij<Ri->size; ij++) {
00351             c = &(Ri->coeff[ij]);
00352             y[i] += c->value * x[c->index];
00353             if(i != c->index) {
00354                 y[c->index] += c->value * x[i];
00355             }
00356         }
00357     }
00358 }
00359 
00360 static void __nlSparseMatrix_mult_rows(
00361     __NLSparseMatrix* A, NLfloat* x, NLfloat* y
00362 ) {
00363     NLuint m = A->m;
00364     NLuint i,ij;
00365     __NLRowColumn* Ri = NULL;
00366     __NLCoeff* c = NULL;
00367     for(i=0; i<m; i++) {
00368         y[i] = 0;
00369         Ri = &(A->row[i]);
00370         for(ij=0; ij<Ri->size; ij++) {
00371             c = &(Ri->coeff[ij]);
00372             y[i] += c->value * x[c->index];
00373         }
00374     }
00375 }
00376 
00377 static void __nlSparseMatrix_mult_cols_symmetric(
00378     __NLSparseMatrix* A, NLfloat* x, NLfloat* y
00379 ) {
00380     NLuint n = A->n;
00381     NLuint j,ii;
00382     __NLRowColumn* Cj = NULL;
00383     __NLCoeff* c = NULL;
00384     for(j=0; j<n; j++) {
00385         y[j] = 0;
00386         Cj = &(A->column[j]);
00387         for(ii=0; ii<Cj->size; ii++) {
00388             c = &(Cj->coeff[ii]);
00389             y[c->index] += c->value * x[j];
00390             if(j != c->index) {
00391                 y[j] += c->value * x[c->index];
00392             }
00393         }
00394     }
00395 }
00396 
00397 static void __nlSparseMatrix_mult_cols(
00398     __NLSparseMatrix* A, NLfloat* x, NLfloat* y
00399 ) {
00400     NLuint n = A->n;
00401     NLuint j,ii; 
00402     __NLRowColumn* Cj = NULL;
00403     __NLCoeff* c = NULL;
00404     __NL_CLEAR_ARRAY(NLfloat, y, A->m);
00405     for(j=0; j<n; j++) {
00406         Cj = &(A->column[j]);
00407         for(ii=0; ii<Cj->size; ii++) {
00408             c = &(Cj->coeff[ii]);
00409             y[c->index] += c->value * x[j];
00410         }
00411     }
00412 }
00413 
00414 /************************************************************************************/
00415 /* SparseMatrix x Vector routines, main driver routine */
00416 
00417 static void __nlSparseMatrixMult(__NLSparseMatrix* A, NLfloat* x, NLfloat* y) {
00418     if(A->storage & __NL_ROWS) {
00419         if(A->storage & __NL_SYMMETRIC) {
00420             __nlSparseMatrix_mult_rows_symmetric(A, x, y);
00421         } else {
00422             __nlSparseMatrix_mult_rows(A, x, y);
00423         }
00424     } else {
00425         if(A->storage & __NL_SYMMETRIC) {
00426             __nlSparseMatrix_mult_cols_symmetric(A, x, y);
00427         } else {
00428             __nlSparseMatrix_mult_cols(A, x, y);
00429         }
00430     }
00431 }
00432 
00433 /* ****************** Routines for least squares ******************* */
00434 
00435 static void __nlSparseMatrix_square(
00436     __NLSparseMatrix* AtA, __NLSparseMatrix *A
00437 ) {
00438     NLuint m = A->m;
00439     NLuint n = A->n;
00440     NLuint i, j0, j1;
00441     __NLRowColumn *Ri = NULL;
00442     __NLCoeff *c0 = NULL, *c1 = NULL;
00443     float value;
00444 
00445     __nlSparseMatrixConstruct(AtA, n, n, A->storage);
00446 
00447     for(i=0; i<m; i++) {
00448         Ri = &(A->row[i]);
00449 
00450         for(j0=0; j0<Ri->size; j0++) {
00451             c0 = &(Ri->coeff[j0]);
00452             for(j1=0; j1<Ri->size; j1++) {
00453                 c1 = &(Ri->coeff[j1]);
00454 
00455                 value = c0->value*c1->value;
00456                 __nlSparseMatrixAdd(AtA, c0->index, c1->index, value);
00457             }
00458         }
00459     }
00460 }
00461 
00462 static void __nlSparseMatrix_transpose_mult_rows(
00463     __NLSparseMatrix* A, NLfloat* x, NLfloat* y
00464 ) {
00465     NLuint m = A->m;
00466     NLuint n = A->n;
00467     NLuint i,ij;
00468     __NLRowColumn* Ri = NULL;
00469     __NLCoeff* c = NULL;
00470 
00471     __NL_CLEAR_ARRAY(NLfloat, y, n);
00472 
00473     for(i=0; i<m; i++) {
00474         Ri = &(A->row[i]);
00475         for(ij=0; ij<Ri->size; ij++) {
00476             c = &(Ri->coeff[ij]);
00477             y[c->index] += c->value * x[i];
00478         }
00479     }
00480 }
00481 
00482 /************************************************************************************/
00483 /* NLContext data structure */
00484 
00485 typedef void(*__NLMatrixFunc)(float* x, float* y);
00486 
00487 typedef struct {
00488     NLfloat  value[4];
00489     NLboolean locked;
00490     NLuint  index;
00491     __NLRowColumn *a;
00492 } __NLVariable;
00493 
00494 #define __NL_STATE_INITIAL              0
00495 #define __NL_STATE_SYSTEM               1
00496 #define __NL_STATE_MATRIX               2
00497 #define __NL_STATE_MATRIX_CONSTRUCTED   3
00498 #define __NL_STATE_SYSTEM_CONSTRUCTED   4
00499 #define __NL_STATE_SYSTEM_SOLVED        5
00500 
00501 typedef struct {
00502     NLenum          state;
00503     NLuint          n;
00504     NLuint          m;
00505     __NLVariable*   variable;
00506     NLfloat*        b;
00507     NLfloat*        Mtb;
00508     __NLSparseMatrix M;
00509     __NLSparseMatrix MtM;
00510     NLfloat*        x;
00511     NLuint          nb_variables;
00512     NLuint          nb_rows;
00513     NLboolean       least_squares;
00514     NLboolean       symmetric;
00515     NLuint          nb_rhs;
00516     NLboolean       solve_again;
00517     NLboolean       alloc_M;
00518     NLboolean       alloc_MtM;
00519     NLboolean       alloc_variable;
00520     NLboolean       alloc_x;
00521     NLboolean       alloc_b;
00522     NLboolean       alloc_Mtb;
00523     NLfloat         error;
00524     __NLMatrixFunc  matrix_vector_prod;
00525 
00526     struct __NLSuperLUContext {
00527         NLboolean alloc_slu;
00528         SuperMatrix L, U;
00529         NLint *perm_c, *perm_r;
00530         SuperLUStat_t stat;
00531     } slu;
00532 } __NLContext;
00533 
00534 static __NLContext* __nlCurrentContext = NULL;
00535 
00536 static void __nlMatrixVectorProd_default(NLfloat* x, NLfloat* y) {
00537     __nlSparseMatrixMult(&(__nlCurrentContext->M), x, y);
00538 }
00539 
00540 
00541 NLContext nlNewContext(void) {
00542     __NLContext* result   = __NL_NEW(__NLContext);
00543     result->state           = __NL_STATE_INITIAL;
00544     result->matrix_vector_prod = __nlMatrixVectorProd_default;
00545     result->nb_rhs = 1;
00546     nlMakeCurrent(result);
00547     return result;
00548 }
00549 
00550 static void __nlFree_SUPERLU(__NLContext *context);
00551 
00552 void nlDeleteContext(NLContext context_in) {
00553     __NLContext* context = (__NLContext*)(context_in);
00554     int i;
00555 
00556     if(__nlCurrentContext == context) {
00557         __nlCurrentContext = NULL;
00558     }
00559     if(context->alloc_M) {
00560         __nlSparseMatrixDestroy(&context->M);
00561     }
00562     if(context->alloc_MtM) {
00563         __nlSparseMatrixDestroy(&context->MtM);
00564     }
00565     if(context->alloc_variable) {
00566         for(i=0; i<context->nb_variables; i++) {
00567             if(context->variable[i].a) {
00568                 __nlRowColumnDestroy(context->variable[i].a);
00569                 __NL_DELETE(context->variable[i].a);
00570             }
00571         }
00572     }
00573     if(context->alloc_b) {
00574         __NL_DELETE_ARRAY(context->b);
00575     }
00576     if(context->alloc_Mtb) {
00577         __NL_DELETE_ARRAY(context->Mtb);
00578     }
00579     if(context->alloc_x) {
00580         __NL_DELETE_ARRAY(context->x);
00581     }
00582     if (context->slu.alloc_slu) {
00583         __nlFree_SUPERLU(context);
00584     }
00585 
00586 #ifdef NL_PARANOID
00587     __NL_CLEAR(__NLContext, context);
00588 #endif
00589     __NL_DELETE(context);
00590 }
00591 
00592 void nlMakeCurrent(NLContext context) {
00593     __nlCurrentContext = (__NLContext*)(context);
00594 }
00595 
00596 NLContext nlGetCurrent(void) {
00597     return __nlCurrentContext;
00598 }
00599 
00600 static void __nlCheckState(NLenum state) {
00601     __nl_assert(__nlCurrentContext->state == state);
00602 }
00603 
00604 static void __nlTransition(NLenum from_state, NLenum to_state) {
00605     __nlCheckState(from_state);
00606     __nlCurrentContext->state = to_state;
00607 }
00608 
00609 /************************************************************************************/
00610 /* Get/Set parameters */
00611 
00612 void nlSolverParameterf(NLenum pname, NLfloat param) {
00613     __nlCheckState(__NL_STATE_INITIAL);
00614     switch(pname) {
00615     case NL_NB_VARIABLES: {
00616         __nl_assert(param > 0);
00617         __nlCurrentContext->nb_variables = (NLuint)param;
00618     } break;
00619     case NL_NB_ROWS: {
00620         __nl_assert(param > 0);
00621         __nlCurrentContext->nb_rows = (NLuint)param;
00622     } break;
00623     case NL_LEAST_SQUARES: {
00624         __nlCurrentContext->least_squares = (NLboolean)param;
00625     } break;
00626     case NL_SYMMETRIC: {
00627         __nlCurrentContext->symmetric = (NLboolean)param;       
00628     } break;
00629     case NL_NB_RIGHT_HAND_SIDES: {
00630         __nlCurrentContext->nb_rhs = (NLuint)param;
00631     } break;
00632     default: {
00633         __nl_assert_not_reached;
00634     } break;
00635     }
00636 }
00637 
00638 void nlSolverParameteri(NLenum pname, NLint param) {
00639     __nlCheckState(__NL_STATE_INITIAL);
00640     switch(pname) {
00641     case NL_NB_VARIABLES: {
00642         __nl_assert(param > 0);
00643         __nlCurrentContext->nb_variables = (NLuint)param;
00644     } break;
00645     case NL_NB_ROWS: {
00646         __nl_assert(param > 0);
00647         __nlCurrentContext->nb_rows = (NLuint)param;
00648     } break;
00649     case NL_LEAST_SQUARES: {
00650         __nlCurrentContext->least_squares = (NLboolean)param;
00651     } break;
00652     case NL_SYMMETRIC: {
00653         __nlCurrentContext->symmetric = (NLboolean)param;       
00654     } break;
00655     case NL_NB_RIGHT_HAND_SIDES: {
00656         __nlCurrentContext->nb_rhs = (NLuint)param;
00657     } break;
00658     default: {
00659         __nl_assert_not_reached;
00660     } break;
00661     }
00662 }
00663 
00664 void nlGetBooleanv(NLenum pname, NLboolean* params) {
00665     switch(pname) {
00666     case NL_LEAST_SQUARES: {
00667         *params = __nlCurrentContext->least_squares;
00668     } break;
00669     case NL_SYMMETRIC: {
00670         *params = __nlCurrentContext->symmetric;
00671     } break;
00672     default: {
00673         __nl_assert_not_reached;
00674     } break;
00675     }
00676 }
00677 
00678 void nlGetFloatv(NLenum pname, NLfloat* params) {
00679     switch(pname) {
00680     case NL_NB_VARIABLES: {
00681         *params = (NLfloat)(__nlCurrentContext->nb_variables);
00682     } break;
00683     case NL_NB_ROWS: {
00684         *params = (NLfloat)(__nlCurrentContext->nb_rows);
00685     } break;
00686     case NL_LEAST_SQUARES: {
00687         *params = (NLfloat)(__nlCurrentContext->least_squares);
00688     } break;
00689     case NL_SYMMETRIC: {
00690         *params = (NLfloat)(__nlCurrentContext->symmetric);
00691     } break;
00692     case NL_ERROR: {
00693         *params = (NLfloat)(__nlCurrentContext->error);
00694     } break;
00695     default: {
00696         __nl_assert_not_reached;
00697     } break;
00698     }
00699 }
00700 
00701 void nlGetIntergerv(NLenum pname, NLint* params) {
00702     switch(pname) {
00703     case NL_NB_VARIABLES: {
00704         *params = (NLint)(__nlCurrentContext->nb_variables);
00705     } break;
00706     case NL_NB_ROWS: {
00707         *params = (NLint)(__nlCurrentContext->nb_rows);
00708     } break;
00709     case NL_LEAST_SQUARES: {
00710         *params = (NLint)(__nlCurrentContext->least_squares);
00711     } break;
00712     case NL_SYMMETRIC: {
00713         *params = (NLint)(__nlCurrentContext->symmetric);
00714     } break;
00715     default: {
00716         __nl_assert_not_reached;
00717     } break;
00718     }
00719 }
00720 
00721 /************************************************************************************/
00722 /* Enable / Disable */
00723 
00724 void nlEnable(NLenum pname) {
00725     switch(pname) {
00726     default: {
00727         __nl_assert_not_reached;
00728     }
00729     }
00730 }
00731 
00732 void nlDisable(NLenum pname) {
00733     switch(pname) {
00734     default: {
00735         __nl_assert_not_reached;
00736     }
00737     }
00738 }
00739 
00740 NLboolean nlIsEnabled(NLenum pname) {
00741     switch(pname) {
00742     default: {
00743         __nl_assert_not_reached;
00744     }
00745     }
00746     return NL_FALSE;
00747 }
00748 
00749 /************************************************************************************/
00750 /* Get/Set Lock/Unlock variables */
00751 
00752 void nlSetVariable(NLuint rhsindex, NLuint index, NLfloat value) {
00753     __nlCheckState(__NL_STATE_SYSTEM);
00754     __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
00755     __nlCurrentContext->variable[index].value[rhsindex] = value;    
00756 }
00757 
00758 NLfloat nlGetVariable(NLuint rhsindex, NLuint index) {
00759     __nl_assert(__nlCurrentContext->state != __NL_STATE_INITIAL);
00760     __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
00761     return __nlCurrentContext->variable[index].value[rhsindex];
00762 }
00763 
00764 void nlLockVariable(NLuint index) {
00765     __nlCheckState(__NL_STATE_SYSTEM);
00766     __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
00767     __nlCurrentContext->variable[index].locked = NL_TRUE;
00768 }
00769 
00770 void nlUnlockVariable(NLuint index) {
00771     __nlCheckState(__NL_STATE_SYSTEM);
00772     __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
00773     __nlCurrentContext->variable[index].locked = NL_FALSE;
00774 }
00775 
00776 NLboolean nlVariableIsLocked(NLuint index) {
00777     __nl_assert(__nlCurrentContext->state != __NL_STATE_INITIAL);
00778     __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
00779     return __nlCurrentContext->variable[index].locked;
00780 }
00781 
00782 /************************************************************************************/
00783 /* System construction */
00784 
00785 static void __nlVariablesToVector() {
00786     __NLContext *context = __nlCurrentContext;
00787     NLuint i, j, nb_rhs;
00788 
00789     __nl_assert(context->alloc_x);
00790     __nl_assert(context->alloc_variable);
00791 
00792     nb_rhs= context->nb_rhs;
00793 
00794     for(i=0; i<context->nb_variables; i++) {
00795         __NLVariable* v = &(context->variable[i]);
00796         if(!v->locked) {
00797             __nl_assert(v->index < context->n);
00798 
00799             for(j=0; j<nb_rhs; j++)
00800                 context->x[context->n*j + v->index] = v->value[j];
00801         }
00802     }
00803 }
00804 
00805 static void __nlVectorToVariables() {
00806     __NLContext *context = __nlCurrentContext;
00807     NLuint i, j, nb_rhs;
00808 
00809     __nl_assert(context->alloc_x);
00810     __nl_assert(context->alloc_variable);
00811 
00812     nb_rhs= context->nb_rhs;
00813 
00814     for(i=0; i<context->nb_variables; i++) {
00815         __NLVariable* v = &(context->variable[i]);
00816         if(!v->locked) {
00817             __nl_assert(v->index < context->n);
00818 
00819             for(j=0; j<nb_rhs; j++)
00820                 v->value[j] = context->x[context->n*j + v->index];
00821         }
00822     }
00823 }
00824 
00825 static void __nlBeginSystem() {
00826     __nl_assert(__nlCurrentContext->nb_variables > 0);
00827 
00828     if (__nlCurrentContext->solve_again)
00829         __nlTransition(__NL_STATE_SYSTEM_SOLVED, __NL_STATE_SYSTEM);
00830     else {
00831         __nlTransition(__NL_STATE_INITIAL, __NL_STATE_SYSTEM);
00832 
00833         __nlCurrentContext->variable = __NL_NEW_ARRAY(
00834             __NLVariable, __nlCurrentContext->nb_variables);
00835         
00836         __nlCurrentContext->alloc_variable = NL_TRUE;
00837     }
00838 }
00839 
00840 static void __nlEndSystem() {
00841     __nlTransition(__NL_STATE_MATRIX_CONSTRUCTED, __NL_STATE_SYSTEM_CONSTRUCTED);   
00842 }
00843 
00844 static void __nlBeginMatrix() {
00845     NLuint i;
00846     NLuint m = 0, n = 0;
00847     NLenum storage = __NL_ROWS;
00848     __NLContext *context = __nlCurrentContext;
00849 
00850     __nlTransition(__NL_STATE_SYSTEM, __NL_STATE_MATRIX);
00851 
00852     if (!context->solve_again) {
00853         for(i=0; i<context->nb_variables; i++) {
00854             if(context->variable[i].locked) {
00855                 context->variable[i].index = ~0;
00856                 context->variable[i].a = __NL_NEW(__NLRowColumn);
00857                 __nlRowColumnConstruct(context->variable[i].a);
00858             }
00859             else
00860                 context->variable[i].index = n++;
00861         }
00862 
00863         m = (context->nb_rows == 0)? n: context->nb_rows;
00864 
00865         context->m = m;
00866         context->n = n;
00867 
00868         __nlSparseMatrixConstruct(&context->M, m, n, storage);
00869         context->alloc_M = NL_TRUE;
00870 
00871         context->b = __NL_NEW_ARRAY(NLfloat, m*context->nb_rhs);
00872         context->alloc_b = NL_TRUE;
00873 
00874         context->x = __NL_NEW_ARRAY(NLfloat, n*context->nb_rhs);
00875         context->alloc_x = NL_TRUE;
00876     }
00877     else {
00878         /* need to recompute b only, A is not constructed anymore */
00879         __NL_CLEAR_ARRAY(NLfloat, context->b, context->m*context->nb_rhs);
00880     }
00881 
00882     __nlVariablesToVector();
00883 }
00884 
00885 static void __nlEndMatrixRHS(NLuint rhs) {
00886     __NLContext *context = __nlCurrentContext;
00887     __NLVariable *variable;
00888     __NLRowColumn *a;
00889     NLfloat *b, *Mtb;
00890     NLuint i, j;
00891 
00892     b = context->b + context->m*rhs;
00893     Mtb = context->Mtb + context->n*rhs;
00894 
00895     for(i=0; i<__nlCurrentContext->nb_variables; i++) {
00896         variable = &(context->variable[i]);
00897 
00898         if(variable->locked) {
00899             a = variable->a;
00900 
00901             for(j=0; j<a->size; j++) {
00902                 b[a->coeff[j].index] -= a->coeff[j].value*variable->value[rhs];
00903             }
00904         }
00905     }
00906 
00907     if(context->least_squares)
00908         __nlSparseMatrix_transpose_mult_rows(&context->M, b, Mtb);
00909 }
00910 
00911 static void __nlEndMatrix() {
00912     __NLContext *context = __nlCurrentContext;
00913     NLuint i;
00914 
00915     __nlTransition(__NL_STATE_MATRIX, __NL_STATE_MATRIX_CONSTRUCTED);   
00916     
00917     if(context->least_squares) {
00918         if(!__nlCurrentContext->solve_again) {
00919             __nlSparseMatrix_square(&context->MtM, &context->M);
00920             context->alloc_MtM = NL_TRUE;
00921 
00922             context->Mtb =
00923                 __NL_NEW_ARRAY(NLfloat, context->n*context->nb_rhs);
00924             context->alloc_Mtb = NL_TRUE;
00925         }
00926     }
00927 
00928     for(i=0; i<context->nb_rhs; i++)
00929         __nlEndMatrixRHS(i);
00930 }
00931 
00932 void nlMatrixAdd(NLuint row, NLuint col, NLfloat value)
00933 {
00934     __NLContext *context = __nlCurrentContext;
00935 
00936     __nlCheckState(__NL_STATE_MATRIX);
00937 
00938     if(context->solve_again)
00939         return;
00940 
00941     if (!context->least_squares && context->variable[row].locked);
00942     else if (context->variable[col].locked) {
00943         if(!context->least_squares)
00944             row = context->variable[row].index;
00945         __nlRowColumnAppend(context->variable[col].a, row, value);
00946     }
00947     else {
00948         __NLSparseMatrix* M  = &context->M;
00949         
00950         if(!context->least_squares)
00951             row = context->variable[row].index;
00952         col = context->variable[col].index;
00953         
00954         __nl_range_assert(row, 0, context->m - 1);
00955         __nl_range_assert(col, 0, context->n - 1);
00956 
00957         __nlSparseMatrixAdd(M, row, col, value);
00958     }
00959 }
00960 
00961 void nlRightHandSideAdd(NLuint rhsindex, NLuint index, NLfloat value)
00962 {
00963     __NLContext *context = __nlCurrentContext;
00964     NLfloat* b = context->b;
00965 
00966     __nlCheckState(__NL_STATE_MATRIX);
00967 
00968     if(context->least_squares) {
00969         __nl_range_assert(index, 0, context->m - 1);
00970         b[rhsindex*context->m + index] += value;
00971     }
00972     else {
00973         if(!context->variable[index].locked) {
00974             index = context->variable[index].index;
00975             __nl_range_assert(index, 0, context->m - 1);
00976 
00977             b[rhsindex*context->m + index] += value;
00978         }
00979     }
00980 }
00981 
00982 void nlRightHandSideSet(NLuint rhsindex, NLuint index, NLfloat value)
00983 {
00984     __NLContext *context = __nlCurrentContext;
00985     NLfloat* b = context->b;
00986 
00987     __nlCheckState(__NL_STATE_MATRIX);
00988 
00989     if(context->least_squares) {
00990         __nl_range_assert(index, 0, context->m - 1);
00991         b[rhsindex*context->m + index] = value;
00992     }
00993     else {
00994         if(!context->variable[index].locked) {
00995             index = context->variable[index].index;
00996             __nl_range_assert(index, 0, context->m - 1);
00997 
00998             b[rhsindex*context->m + index] = value;
00999         }
01000     }
01001 }
01002 
01003 void nlBegin(NLenum prim) {
01004     switch(prim) {
01005     case NL_SYSTEM: {
01006         __nlBeginSystem();
01007     } break;
01008     case NL_MATRIX: {
01009         __nlBeginMatrix();
01010     } break;
01011     default: {
01012         __nl_assert_not_reached;
01013     }
01014     }
01015 }
01016 
01017 void nlEnd(NLenum prim) {
01018     switch(prim) {
01019     case NL_SYSTEM: {
01020         __nlEndSystem();
01021     } break;
01022     case NL_MATRIX: {
01023         __nlEndMatrix();
01024     } break;
01025     default: {
01026         __nl_assert_not_reached;
01027     }
01028     }
01029 }
01030 
01031 /************************************************************************/
01032 /* SuperLU wrapper */
01033 
01034 /* Note: SuperLU is difficult to call, but it is worth it.  */
01035 /* Here is a driver inspired by A. Sheffer's "cow flattener". */
01036 static NLboolean __nlFactorize_SUPERLU(__NLContext *context, NLint *permutation) {
01037 
01038     /* OpenNL Context */
01039     __NLSparseMatrix* M = (context->least_squares)? &context->MtM: &context->M;
01040     NLuint n = context->n;
01041     NLuint nnz = __nlSparseMatrixNNZ(M); /* number of non-zero coeffs */
01042 
01043     /* Compressed Row Storage matrix representation */
01044     NLint   *xa     = __NL_NEW_ARRAY(NLint, n+1);
01045     NLfloat *rhs    = __NL_NEW_ARRAY(NLfloat, n);
01046     NLfloat *a      = __NL_NEW_ARRAY(NLfloat, nnz);
01047     NLint   *asub   = __NL_NEW_ARRAY(NLint, nnz);
01048     NLint   *etree  = __NL_NEW_ARRAY(NLint, n);
01049 
01050     /* SuperLU variables */
01051     SuperMatrix At, AtP;
01052     NLint info, panel_size, relax;
01053     superlu_options_t options;
01054 
01055     /* Temporary variables */
01056     NLuint i, jj, count;
01057     
01058     __nl_assert(!(M->storage & __NL_SYMMETRIC));
01059     __nl_assert(M->storage & __NL_ROWS);
01060     __nl_assert(M->m == M->n);
01061     
01062     /* Convert M to compressed column format */
01063     for(i=0, count=0; i<n; i++) {
01064         __NLRowColumn *Ri = M->row + i;
01065         xa[i] = count;
01066 
01067         for(jj=0; jj<Ri->size; jj++, count++) {
01068             a[count] = Ri->coeff[jj].value;
01069             asub[count] = Ri->coeff[jj].index;
01070         }
01071     }
01072     xa[n] = nnz;
01073 
01074     /* Free M, don't need it anymore at this point */
01075     __nlSparseMatrixClear(M);
01076 
01077     /* Create superlu A matrix transposed */
01078     sCreate_CompCol_Matrix(
01079         &At, n, n, nnz, a, asub, xa, 
01080         SLU_NC,     /* Colum wise, no supernode */
01081         SLU_S,      /* floats */ 
01082         SLU_GE      /* general storage */
01083     );
01084 
01085     /* Set superlu options */
01086     set_default_options(&options);
01087     options.ColPerm = MY_PERMC;
01088     options.Fact = DOFACT;
01089 
01090     StatInit(&(context->slu.stat));
01091 
01092     panel_size = sp_ienv(1); /* sp_ienv give us the defaults */
01093     relax = sp_ienv(2);
01094 
01095     /* Compute permutation and permuted matrix */
01096     context->slu.perm_r = __NL_NEW_ARRAY(NLint, n);
01097     context->slu.perm_c = __NL_NEW_ARRAY(NLint, n);
01098 
01099     if ((permutation == NULL) || (*permutation == -1)) {
01100         get_perm_c(3, &At, context->slu.perm_c);
01101 
01102         if (permutation)
01103             memcpy(permutation, context->slu.perm_c, sizeof(NLint)*n);
01104     }
01105     else
01106         memcpy(context->slu.perm_c, permutation, sizeof(NLint)*n);
01107 
01108     sp_preorder(&options, &At, context->slu.perm_c, etree, &AtP);
01109 
01110     /* Decompose into L and U */
01111     sgstrf(&options, &AtP, relax, panel_size,
01112         etree, NULL, 0, context->slu.perm_c, context->slu.perm_r,
01113         &(context->slu.L), &(context->slu.U), &(context->slu.stat), &info);
01114 
01115     /* Cleanup */
01116 
01117     Destroy_SuperMatrix_Store(&At);
01118     Destroy_CompCol_Permuted(&AtP);
01119 
01120     __NL_DELETE_ARRAY(etree);
01121     __NL_DELETE_ARRAY(xa);
01122     __NL_DELETE_ARRAY(rhs);
01123     __NL_DELETE_ARRAY(a);
01124     __NL_DELETE_ARRAY(asub);
01125 
01126     context->slu.alloc_slu = NL_TRUE;
01127 
01128     return (info == 0);
01129 }
01130 
01131 static NLboolean __nlInvert_SUPERLU(__NLContext *context) {
01132 
01133     /* OpenNL Context */
01134     NLfloat* b = (context->least_squares)? context->Mtb: context->b;
01135     NLfloat* x = context->x;
01136     NLuint n = context->n, j;
01137 
01138     /* SuperLU variables */
01139     SuperMatrix B;
01140     NLint info = 0;
01141 
01142     for(j=0; j<context->nb_rhs; j++, b+=n, x+=n) {
01143         /* Create superlu array for B */
01144         sCreate_Dense_Matrix(
01145             &B, n, 1, b, n, 
01146             SLU_DN, /* Fortran-type column-wise storage */
01147             SLU_S,  /* floats                         */
01148             SLU_GE  /* general                        */
01149         );
01150 
01151         /* Forward/Back substitution to compute x */
01152         sgstrs(TRANS, &(context->slu.L), &(context->slu.U),
01153             context->slu.perm_c, context->slu.perm_r, &B,
01154             &(context->slu.stat), &info);
01155 
01156         if(info == 0)
01157             memcpy(x, ((DNformat*)B.Store)->nzval, sizeof(*x)*n);
01158 
01159         Destroy_SuperMatrix_Store(&B);
01160     }
01161 
01162     return (info == 0);
01163 }
01164 
01165 static void __nlFree_SUPERLU(__NLContext *context) {
01166 
01167     Destroy_SuperNode_Matrix(&(context->slu.L));
01168     Destroy_CompCol_Matrix(&(context->slu.U));
01169 
01170     StatFree(&(context->slu.stat));
01171 
01172     __NL_DELETE_ARRAY(context->slu.perm_r);
01173     __NL_DELETE_ARRAY(context->slu.perm_c);
01174 
01175     context->slu.alloc_slu = NL_FALSE;
01176 }
01177 
01178 void nlPrintMatrix(void) {
01179     __NLContext *context = __nlCurrentContext;
01180     __NLSparseMatrix* M  = &(context->M);
01181     __NLSparseMatrix* MtM  = &(context->MtM);
01182     float *b = context->b;
01183     NLuint i, jj, k;
01184     NLuint m = context->m;
01185     NLuint n = context->n;
01186     __NLRowColumn* Ri = NULL;
01187     float *value = malloc(sizeof(*value)*(n+m));
01188 
01189     printf("A:\n");
01190     for(i=0; i<m; i++) {
01191         Ri = &(M->row[i]);
01192 
01193         memset(value, 0.0, sizeof(*value)*n);
01194         for(jj=0; jj<Ri->size; jj++)
01195             value[Ri->coeff[jj].index] = Ri->coeff[jj].value;
01196 
01197         for (k = 0; k<n; k++)
01198             printf("%.3f ", value[k]);
01199         printf("\n");
01200     }
01201 
01202     for(k=0; k<context->nb_rhs; k++) {
01203         printf("b (%d):\n", k);
01204         for(i=0; i<n; i++)
01205             printf("%f ", b[context->n*k + i]);
01206         printf("\n");
01207     }
01208 
01209     if(context->alloc_MtM) {
01210         printf("AtA:\n");
01211         for(i=0; i<n; i++) {
01212             Ri = &(MtM->row[i]);
01213 
01214             memset(value, 0.0, sizeof(*value)*m);
01215             for(jj=0; jj<Ri->size; jj++)
01216                 value[Ri->coeff[jj].index] = Ri->coeff[jj].value;
01217 
01218             for (k = 0; k<n; k++)
01219                 printf("%.3f ", value[k]);
01220             printf("\n");
01221         }
01222 
01223         for(k=0; k<context->nb_rhs; k++) {
01224             printf("Mtb (%d):\n", k);
01225             for(i=0; i<n; i++)
01226                 printf("%f ", context->Mtb[context->n*k + i]);
01227             printf("\n");
01228         }
01229         printf("\n");
01230     }
01231 
01232     free(value);
01233 }
01234 
01235 /************************************************************************/
01236 /* nlSolve() driver routine */
01237 
01238 NLboolean nlSolveAdvanced(NLint *permutation, NLboolean solveAgain) {
01239     NLboolean result = NL_TRUE;
01240 
01241     __nlCheckState(__NL_STATE_SYSTEM_CONSTRUCTED);
01242 
01243     if (!__nlCurrentContext->solve_again)
01244         result = __nlFactorize_SUPERLU(__nlCurrentContext, permutation);
01245 
01246     if (result) {
01247         result = __nlInvert_SUPERLU(__nlCurrentContext);
01248 
01249         if (result) {
01250             __nlVectorToVariables();
01251 
01252             if (solveAgain)
01253                 __nlCurrentContext->solve_again = NL_TRUE;
01254 
01255             __nlTransition(__NL_STATE_SYSTEM_CONSTRUCTED, __NL_STATE_SYSTEM_SOLVED);
01256         }
01257     }
01258 
01259     return result;
01260 }
01261 
01262 NLboolean nlSolve() {
01263     return nlSolveAdvanced(NULL, NL_FALSE);
01264 }
01265