las2.c

Go to the documentation of this file.
00001 /*************************************************************************
00002                            (c) Copyright 2003
00003                               Douglas Rohde
00004 
00005                      adapted from SVDPACKC, which is
00006 
00007                            (c) Copyright 1993
00008                         University of Tennessee
00009                           All Rights Reserved                          
00010  *************************************************************************/
00011 #include <stdio.h>
00012 #include <stdlib.h>
00013 #include <string.h>
00014 #include <errno.h>
00015 #include <math.h>
00016 #include <fcntl.h>
00017 #include "svdlib.h"
00018 #include "svdutil.h"
00019 
00020 #define MAXLL 2
00021 
00022 #define LMTNW   100000000 /* max. size of working area allowed  */
00023 
00024 enum storeVals {STORQ = 1, RETRQ, STORP, RETRP};
00025 
00026 static char *error_msg[] = {  /* error messages used by function    *
00027                                * check_parameters                   */
00028   NULL,
00029   "",
00030   "ENDL MUST BE LESS THAN ENDR",
00031   "REQUESTED DIMENSIONS CANNOT EXCEED NUM ITERATIONS",
00032   "ONE OF YOUR DIMENSIONS IS LESS THAN OR EQUAL TO ZERO",
00033   "NUM ITERATIONS (NUMBER OF LANCZOS STEPS) IS INVALID",
00034   "REQUESTED DIMENSIONS (NUMBER OF EIGENPAIRS DESIRED) IS INVALID",
00035   "6*N+4*ITERATIONS+1 + ITERATIONS*ITERATIONS CANNOT EXCEED NW",
00036   "6*N+4*ITERATIONS+1 CANNOT EXCEED NW", NULL};
00037 
00038 double **LanStore, *OPBTemp;
00039 double eps, eps1, reps, eps34;
00040 long ierr;
00041 /*
00042 double rnm, anorm, tol;
00043 FILE *fp_out1, *fp_out2;
00044 */
00045 
00046 void   purge(long n, long ll, double *r, double *q, double *ra,  
00047              double *qa, double *wrk, double *eta, double *oldeta, long step, 
00048              double *rnmp, double tol);
00049 void   ortbnd(double *alf, double *eta, double *oldeta, double *bet, long step,
00050               double rnm);
00051 double startv(SMat A, double *wptr[], long step, long n);
00052 void   store(long, long, long, double *);
00053 void   imtql2(long, long, double *, double *, double *);
00054 void   imtqlb(long n, double d[], double e[], double bnd[]);
00055 void   write_header(long, long, double, double, long, double, long, long, 
00056                     long);
00057 long   check_parameters(SMat A, long dimensions, long iterations, 
00058                         double endl, double endr, long vectors);
00059 int    lanso(SMat A, long iterations, long dimensions, double endl,
00060              double endr, double *ritz, double *bnd, double *wptr[], 
00061              long *neigp, long n);
00062 long   ritvec(long n, SMat A, SVDRec R, double kappa, double *ritz, 
00063               double *bnd, double *alf, double *bet, double *w2, 
00064               long steps, long neig);
00065 long   lanczos_step(SMat A, long first, long last, double *wptr[],
00066                     double *alf, double *eta, double *oldeta,
00067                     double *bet, long *ll, long *enough, double *rnmp, 
00068                     double *tolp, long n);
00069 void   stpone(SMat A, double *wrkptr[], double *rnmp, double *tolp, long n);
00070 long   error_bound(long *, double, double, double *, double *, long step, 
00071                    double tol);
00072 void   machar(long *ibeta, long *it, long *irnd, long *machep, long *negep);
00073 
00074 /***********************************************************************
00075  *                                                                     *
00076  *                        main()                                       *
00077  * Sparse SVD(A) via Eigensystem of A'A symmetric Matrix               *
00078  *                  (double precision)                                 *
00079  *                                                                     *
00080  ***********************************************************************/
00081 /***********************************************************************
00082 
00083    Description
00084    -----------
00085 
00086    This sample program uses landr to compute singular triplets of A via
00087    the equivalent symmetric eigenvalue problem                         
00088 
00089    B x = lambda x, where x' = (u',v'), lambda = sigma**2,
00090    where sigma is a singular value of A,
00091                                                                      
00092    B = A'A , and A is m (nrow) by n (ncol) (nrow >> ncol),                
00093                                                                  
00094    so that {u,sqrt(lambda),v} is a singular triplet of A.        
00095    (A' = transpose of A)                                      
00096                                                             
00097    User supplied routines: svd_opa, opb, store, timer              
00098                                                         
00099    svd_opa(     x,y) takes an n-vector x and returns A*x in y.
00100    svd_opb(ncol,x,y) takes an n-vector x and returns B*x in y.
00101                                                                   
00102    Based on operation flag isw, store(n,isw,j,s) stores/retrieves 
00103    to/from storage a vector of length n in s.                   
00104                                                                
00105    User should edit timer() with an appropriate call to an intrinsic
00106    timing routine that returns elapsed user time.                      
00107 
00108 
00109    External parameters 
00110    -------------------
00111 
00112    Defined and documented in las2.h
00113 
00114 
00115    Local parameters 
00116    ----------------
00117 
00118   (input)
00119    endl     left end of interval containing unwanted eigenvalues of B
00120    endr     right end of interval containing unwanted eigenvalues of B
00121    kappa    relative accuracy of ritz values acceptable as eigenvalues
00122               of B
00123               vectors is not equal to 1
00124    r        work array
00125    n        dimension of the eigenproblem for matrix B (ncol)
00126    dimensions   upper limit of desired number of singular triplets of A
00127    iterations   upper limit of desired number of Lanczos steps
00128    nnzero   number of nonzeros in A
00129    vectors  1 indicates both singular values and singular vectors are 
00130               wanted and they can be found in output file lav2;
00131               0 indicates only singular values are wanted 
00132                 
00133   (output)
00134    ritz     array of ritz values
00135    bnd      array of error bounds
00136    d        array of singular values
00137    memory   total memory allocated in bytes to solve the B-eigenproblem
00138 
00139 
00140    Functions used
00141    --------------
00142 
00143    BLAS         svd_daxpy, svd_dscal, svd_ddot
00144    USER         svd_opa, svd_opb, timer
00145    MISC         write_header, check_parameters
00146    LAS2         landr
00147 
00148 
00149    Precision
00150    ---------
00151 
00152    All floating-point calculations are done in double precision;
00153    variables are declared as long and double.
00154 
00155 
00156    LAS2 development
00157    ----------------
00158 
00159    LAS2 is a C translation of the Fortran-77 LAS2 from the SVDPACK
00160    library written by Michael W. Berry, University of Tennessee,
00161    Dept. of Computer Science, 107 Ayres Hall, Knoxville, TN, 37996-1301
00162 
00163    31 Jan 1992:  Date written 
00164 
00165    Theresa H. Do
00166    University of Tennessee
00167    Dept. of Computer Science
00168    107 Ayres Hall
00169    Knoxville, TN, 37996-1301
00170    internet: tdo@cs.utk.edu
00171 
00172  ***********************************************************************/
00173 
00174 /***********************************************************************
00175  *                                                                     *
00176  *                    check_parameters()                               *
00177  *                                                                     *
00178  ***********************************************************************/
00179 /***********************************************************************
00180 
00181    Description
00182    -----------
00183    Function validates input parameters and returns error code (long)  
00184 
00185    Parameters 
00186    ----------
00187   (input)
00188    dimensions   upper limit of desired number of eigenpairs of B           
00189    iterations   upper limit of desired number of lanczos steps             
00190    n        dimension of the eigenproblem for matrix B               
00191    endl     left end of interval containing unwanted eigenvalues of B
00192    endr     right end of interval containing unwanted eigenvalues of B
00193    vectors  1 indicates both eigenvalues and eigenvectors are wanted 
00194             and they can be found in lav2; 0 indicates eigenvalues only
00195    nnzero   number of nonzero elements in input matrix (matrix A)      
00196                                                                       
00197  ***********************************************************************/
00198 
00199 long check_parameters(SMat A, long dimensions, long iterations, 
00200                       double endl, double endr, long vectors) {
00201    long error_index;
00202    error_index = 0;
00203 
00204    if (endl >/*=*/ endr)  error_index = 2;
00205    else if (dimensions > iterations) error_index = 3;
00206    else if (A->cols <= 0 || A->rows <= 0) error_index = 4;
00207    /*else if (n > A->cols || n > A->rows) error_index = 1;*/
00208    else if (iterations <= 0 || iterations > A->cols || iterations > A->rows)
00209      error_index = 5;
00210    else if (dimensions <= 0 || dimensions > iterations) error_index = 6;
00211    if (error_index) 
00212      svd_error("svdLAS2 parameter error: %s\n", error_msg[error_index]);
00213    return(error_index);
00214 }
00215 
00216 /***********************************************************************
00217  *                                                                     *
00218  *                        write_header()                               *
00219  *   Function writes out header of output file containing ritz values  *
00220  *                                                                     *
00221  ***********************************************************************/
00222 
00223 void write_header(long iterations, long dimensions, double endl, double endr, 
00224                   long vectors, double kappa, long nrow, long ncol, 
00225                   long vals) {
00226   printf("SOLVING THE [A^TA] EIGENPROBLEM\n");
00227   printf("NO. OF ROWS               = %6ld\n", nrow);
00228   printf("NO. OF COLUMNS            = %6ld\n", ncol);
00229   printf("NO. OF NON-ZERO VALUES    = %6ld\n", vals);
00230   printf("MATRIX DENSITY            = %6.2f%%\n", 
00231          ((float) vals / nrow) * 100 / ncol);
00232   /* printf("ORDER OF MATRIX A         = %5ld\n", n); */
00233   printf("MAX. NO. OF LANCZOS STEPS = %6ld\n", iterations);
00234   printf("MAX. NO. OF EIGENPAIRS    = %6ld\n", dimensions);
00235   printf("LEFT  END OF THE INTERVAL = %9.2E\n", endl);
00236   printf("RIGHT END OF THE INTERVAL = %9.2E\n", endr);
00237   printf("KAPPA                     = %9.2E\n", kappa);
00238   /* printf("WANT S-VECTORS?   [T/F]   =     %c\n", (vectors) ? 'T' : 'F'); */
00239   printf("\n");
00240   return;
00241 }
00242 
00243 
00244 /***********************************************************************
00245  *                                                                     *
00246  *                              landr()                                *
00247  *        Lanczos algorithm with selective orthogonalization           *
00248  *                    Using Simon's Recurrence                         *
00249  *                       (double precision)                            *
00250  *                                                                     *
00251  ***********************************************************************/
00252 /***********************************************************************
00253 
00254    Description
00255    -----------
00256 
00257    landr() is the LAS2 driver routine that, upon entry,
00258      (1)  checks for the validity of input parameters of the 
00259           B-eigenproblem 
00260      (2)  determines several machine constants
00261      (3)  makes a Lanczos run
00262      (4)  calculates B-eigenvectors (singular vectors of A) if requested 
00263           by user
00264 
00265 
00266    arguments
00267    ---------
00268 
00269    (input)
00270    n        dimension of the eigenproblem for A'A
00271    iterations   upper limit of desired number of Lanczos steps
00272    dimensions   upper limit of desired number of eigenpairs
00273    nnzero   number of nonzeros in matrix A
00274    endl     left end of interval containing unwanted eigenvalues of B
00275    endr     right end of interval containing unwanted eigenvalues of B
00276    vectors  1 indicates both eigenvalues and eigenvectors are wanted
00277               and they can be found in output file lav2; 
00278             0 indicates only eigenvalues are wanted
00279    kappa    relative accuracy of ritz values acceptable as eigenvalues
00280               of B (singular values of A)
00281    r        work array
00282 
00283    (output)
00284    j        number of Lanczos steps actually taken                     
00285    neig     number of ritz values stabilized                           
00286    ritz     array to hold the ritz values                              
00287    bnd      array to hold the error bounds
00288 
00289 
00290    External parameters
00291    -------------------
00292 
00293    Defined and documented in las2.h
00294 
00295 
00296    local parameters
00297    -------------------
00298 
00299    ibeta    radix for the floating-point representation
00300    it       number of base ibeta digits in the floating-point significand
00301    irnd     floating-point addition rounded or chopped
00302    machep   machine relative precision or round-off error
00303    negeps   largest negative integer
00304    wptr     array of pointers each pointing to a work space
00305 
00306 
00307    Functions used
00308    --------------
00309 
00310    MISC         svd_dmax, machar, check_parameters
00311    LAS2         ritvec, lanso
00312 
00313  ***********************************************************************/
00314 
00315 SVDRec svdLAS2A(SMat A, long dimensions) {
00316   double end[2] = {-1.0e-30, 1.0e-30};
00317   double kappa = 1e-6;
00318   if (!A) {
00319     svd_error("svdLAS2A called with NULL array\n");
00320     return NULL;
00321   }
00322   return svdLAS2(A, dimensions, 0, end, kappa);
00323 }
00324 
00325 
00326 SVDRec svdLAS2(SMat A, long dimensions, long iterations, double end[2], 
00327                double kappa) {
00328   char transpose = FALSE;
00329   long ibeta, it, irnd, machep, negep, n, i, steps, nsig, neig, m;
00330   double *wptr[10], *ritz, *bnd;
00331   SVDRec R = NULL;
00332   
00333   svdResetCounters();
00334 
00335   m = svd_imin(A->rows, A->cols);
00336   if (dimensions <= 0 || dimensions > m)
00337     dimensions = m;
00338   if (iterations <= 0 || iterations > m)
00339     iterations = m;
00340   if (iterations < dimensions) iterations = dimensions;
00341 
00342   /* Write output header */
00343   if (SVDVerbosity > 0)
00344     write_header(iterations, dimensions, end[0], end[1], TRUE, kappa, A->rows, 
00345                  A->cols, A->vals);
00346 
00347   /* Check parameters */
00348   if (check_parameters(A, dimensions, iterations, end[0], end[1], TRUE))
00349     return NULL;
00350 
00351   /* If A is wide, the SVD is computed on its transpose for speed. */
00352   if (A->cols >= A->rows * 1.2) {
00353     if (SVDVerbosity > 0) printf("TRANSPOSING THE MATRIX FOR SPEED\n");
00354     transpose = TRUE;
00355     A = svdTransposeS(A);
00356   }
00357 
00358   n = A->cols;
00359   /* Compute machine precision */ 
00360   machar(&ibeta, &it, &irnd, &machep, &negep);
00361   eps1 = eps * sqrt((double) n);
00362   reps = sqrt(eps);
00363   eps34 = reps * sqrt(reps);
00364 
00365   /* Allocate temporary space. */
00366   if (!(wptr[0] = svd_doubleArray(n, TRUE, "las2: wptr[0]"))) goto abort;
00367   if (!(wptr[1] = svd_doubleArray(n, FALSE, "las2: wptr[1]"))) goto abort;
00368   if (!(wptr[2] = svd_doubleArray(n, FALSE, "las2: wptr[2]"))) goto abort;
00369   if (!(wptr[3] = svd_doubleArray(n, FALSE, "las2: wptr[3]"))) goto abort;
00370   if (!(wptr[4] = svd_doubleArray(n, FALSE, "las2: wptr[4]"))) goto abort;
00371   if (!(wptr[5] = svd_doubleArray(n, FALSE, "las2: wptr[5]"))) goto abort;
00372   if (!(wptr[6] = svd_doubleArray(iterations, FALSE, "las2: wptr[6]"))) 
00373     goto abort;
00374   if (!(wptr[7] = svd_doubleArray(iterations, FALSE, "las2: wptr[7]"))) 
00375     goto abort;
00376   if (!(wptr[8] = svd_doubleArray(iterations, FALSE, "las2: wptr[8]"))) 
00377     goto abort;
00378   if (!(wptr[9] = svd_doubleArray(iterations + 1, FALSE, "las2: wptr[9]"))) 
00379     goto abort;
00380   /* Calloc may be unnecessary: */
00381   if (!(ritz    = svd_doubleArray(iterations + 1, TRUE, "las2: ritz"))) 
00382     goto abort;  
00383   /* Calloc may be unnecessary: */
00384   if (!(bnd     = svd_doubleArray(iterations + 1, TRUE, "las2: bnd"))) 
00385     goto abort;
00386   memset(bnd, 127, (iterations + 1) * sizeof(double));
00387 
00388   if (!(LanStore = (double **) calloc(iterations + MAXLL, sizeof(double *))))
00389     goto abort;
00390   if (!(OPBTemp = svd_doubleArray(A->rows, FALSE, "las2: OPBTemp"))) 
00391     goto abort;
00392 
00393   /* Actually run the lanczos thing: */
00394   steps = lanso(A, iterations, dimensions, end[0], end[1], ritz, bnd, wptr, 
00395                 &neig, n);
00396 
00397   /* Print some stuff. */
00398   if (SVDVerbosity > 0) {
00399     printf("NUMBER OF LANCZOS STEPS   = %6ld\n"
00400            "RITZ VALUES STABILIZED    = %6ld\n", steps + 1, neig);
00401   }
00402   if (SVDVerbosity > 2) {
00403     printf("\nCOMPUTED RITZ VALUES  (ERROR BNDS)\n");
00404     for (i = 0; i <= steps; i++)
00405       printf("%3ld  %22.14E  (%11.2E)\n", i + 1, ritz[i], bnd[i]);
00406   }
00407 
00408   SAFE_FREE(wptr[0]);
00409   SAFE_FREE(wptr[1]);
00410   SAFE_FREE(wptr[2]);
00411   SAFE_FREE(wptr[3]);
00412   SAFE_FREE(wptr[4]);
00413   SAFE_FREE(wptr[7]);
00414   SAFE_FREE(wptr[8]);
00415 
00416   /* Compute eigenvectors */
00417   kappa = svd_dmax(fabs(kappa), eps34);
00418   
00419   R = svdNewSVDRec();
00420   if (!R) {
00421     svd_error("svdLAS2: allocation of R failed");
00422     goto cleanup;
00423   }
00424   R->d  = /*svd_imin(nsig, dimensions)*/dimensions;
00425   R->Ut = svdNewDMat(R->d, A->rows);
00426   R->S  = svd_doubleArray(R->d, TRUE, "las2: R->s");
00427   R->Vt = svdNewDMat(R->d, A->cols);
00428   if (!R->Ut || !R->S || !R->Vt) {
00429     svd_error("svdLAS2: allocation of R failed");
00430     goto cleanup;
00431   }
00432 
00433   nsig = ritvec(n, A, R, kappa, ritz, bnd, wptr[6], wptr[9], wptr[5], steps, 
00434                 neig);
00435   
00436   if (SVDVerbosity > 1) {
00437     printf("\nSINGULAR VALUES: ");
00438     svdWriteDenseArray(R->S, R->d, "-", FALSE);
00439 
00440     if (SVDVerbosity > 2) {
00441       printf("\nLEFT SINGULAR VECTORS (transpose of U): ");
00442       svdWriteDenseMatrix(R->Ut, "-", SVD_F_DT);
00443 
00444       printf("\nRIGHT SINGULAR VECTORS (transpose of V): ");
00445       svdWriteDenseMatrix(R->Vt, "-", SVD_F_DT);
00446     }
00447   } else if (SVDVerbosity > 0)
00448     printf("SINGULAR VALUES FOUND     = %6d\n", R->d);
00449 
00450  cleanup:    
00451   for (i = 0; i <= 9; i++)
00452     SAFE_FREE(wptr[i]);
00453   SAFE_FREE(ritz);
00454   SAFE_FREE(bnd);
00455   if (LanStore) {
00456     for (i = 0; i < iterations + MAXLL; i++)
00457       SAFE_FREE(LanStore[i]);
00458     SAFE_FREE(LanStore);
00459   }
00460   SAFE_FREE(OPBTemp);
00461 
00462   /* This swaps and transposes the singular matrices if A was transposed. */
00463   if (R && transpose) {
00464     DMat T;
00465     svdFreeSMat(A);
00466     T = R->Ut;
00467     R->Ut = R->Vt;
00468     R->Vt = T;
00469   }
00470 
00471   return R;
00472 abort:
00473   svd_error("svdLAS2: fatal error, aborting");
00474   return NULL;
00475 }
00476 
00477 
00478 /***********************************************************************
00479  *                                                                     *
00480  *                        ritvec()                                     *
00481  *          Function computes the singular vectors of matrix A         *
00482  *                                                                     *
00483  ***********************************************************************/
00484 /***********************************************************************
00485 
00486    Description
00487    -----------
00488 
00489    This function is invoked by landr() only if eigenvectors of the A'A
00490    eigenproblem are desired.  When called, ritvec() computes the 
00491    singular vectors of A and writes the result to an unformatted file.
00492 
00493 
00494    Parameters
00495    ----------
00496 
00497    (input)
00498    nrow       number of rows of A
00499    steps      number of Lanczos iterations performed
00500    fp_out2    pointer to unformatted output file
00501    n          dimension of matrix A
00502    kappa      relative accuracy of ritz values acceptable as 
00503                 eigenvalues of A'A
00504    ritz       array of ritz values
00505    bnd        array of error bounds
00506    alf        array of diagonal elements of the tridiagonal matrix T
00507    bet        array of off-diagonal elements of T
00508    w1, w2     work space
00509 
00510    (output)
00511    xv1        array of eigenvectors of A'A (right singular vectors of A)
00512    ierr       error code
00513               0 for normal return from imtql2()
00514               k if convergence did not occur for k-th eigenvalue in
00515                 imtql2()
00516    nsig       number of accepted ritz values based on kappa
00517 
00518    (local)
00519    s          work array which is initialized to the identity matrix
00520               of order (j + 1) upon calling imtql2().  After the call,
00521               s contains the orthonormal eigenvectors of the symmetric 
00522               tridiagonal matrix T
00523 
00524    Functions used
00525    --------------
00526 
00527    BLAS         svd_dscal, svd_dcopy, svd_daxpy
00528    USER         store
00529                 imtql2
00530 
00531  ***********************************************************************/
00532 
00533 void rotateArray(double *a, int size, int x) {
00534   int i, j, n, start;
00535   double t1, t2;
00536   if (x == 0) return;
00537   j = start = 0;
00538   t1 = a[0];
00539   for (i = 0; i < size; i++) {
00540     n = (j >= x) ? j - x : j + size - x;
00541     t2 = a[n];
00542     a[n] = t1;
00543     t1 = t2;
00544     j = n;
00545     if (j == start) {
00546       start = ++j;
00547       t1 = a[j];
00548     }
00549   }
00550 }
00551 
00552 long ritvec(long n, SMat A, SVDRec R, double kappa, double *ritz, double *bnd, 
00553             double *alf, double *bet, double *w2, long steps, long neig) {
00554   long js, jsq, i, k, /*size,*/ id2, tmp, nsig, x;
00555   double *s, *xv2, tmp0, tmp1, xnorm, *w1 = R->Vt->value[0];
00556   
00557   js = steps + 1;
00558   jsq = js * js;
00559   /*size = sizeof(double) * n;*/
00560   
00561   s = svd_doubleArray(jsq, TRUE, "ritvec: s");
00562   xv2 = svd_doubleArray(n, FALSE, "ritvec: xv2");
00563   
00564   /* initialize s to an identity matrix */
00565   for (i = 0; i < jsq; i+= (js+1)) s[i] = 1.0;
00566   svd_dcopy(js, alf, 1, w1, -1);
00567   svd_dcopy(steps, &bet[1], 1, &w2[1], -1);
00568   
00569   /* on return from imtql2(), w1 contains eigenvalues in ascending 
00570    * order and s contains the corresponding eigenvectors */
00571   imtql2(js, js, w1, w2, s);
00572   if (ierr) return 0;
00573   
00574   /*fwrite((char *)&n, sizeof(n), 1, fp_out2);
00575     fwrite((char *)&js, sizeof(js), 1, fp_out2);
00576     fwrite((char *)&kappa, sizeof(kappa), 1, fp_out2);*/
00577   /*id = 0;*/
00578   nsig = 0;
00579   x = 0;
00580   id2 = jsq - js;
00581   for (k = 0; k < js; k++) {
00582     tmp = id2;
00583     if (bnd[k] <= kappa * fabs(ritz[k]) && k > js-neig-1) {
00584       if (--x < 0) x = R->d - 1;
00585       w1 = R->Vt->value[x];
00586       for (i = 0; i < n; i++) w1[i] = 0.0;
00587       for (i = 0; i < js; i++) {
00588         store(n, RETRQ, i, w2);
00589         svd_daxpy(n, s[tmp], w2, 1, w1, 1);
00590         tmp -= js;
00591       }
00592       /*fwrite((char *)w1, size, 1, fp_out2);*/
00593       
00594       /* store the w1 vector row-wise in array xv1;   
00595        * size of xv1 is (steps+1) * (nrow+ncol) elements 
00596        * and each vector, even though only ncol long,
00597        * will have (nrow+ncol) elements in xv1.      
00598        * It is as if xv1 is a 2-d array (steps+1) by     
00599        * (nrow+ncol) and each vector occupies a row  */
00600 
00601       /* j is the index in the R arrays, which are sorted by high to low 
00602          singular values. */
00603         
00604       /*for (i = 0; i < n; i++) R->Vt->value[x]xv1[id++] = w1[i];*/
00605       /*id += nrow;*/
00606       nsig++;
00607     }
00608     id2++;
00609   }
00610   SAFE_FREE(s);
00611 
00612   /* Rotate the singular vectors and values. */
00613   /* x is now the location of the highest singular value. */
00614   rotateArray(R->Vt->value[0], R->Vt->rows * R->Vt->cols, 
00615               x * R->Vt->cols);
00616   R->d = svd_imin(R->d, nsig);
00617   for (x = 0; x < R->d; x++) {
00618     /* multiply by matrix B first */
00619     svd_opb(A, R->Vt->value[x], xv2, OPBTemp);
00620     tmp0 = svd_ddot(n, R->Vt->value[x], 1, xv2, 1);
00621     svd_daxpy(n, -tmp0, R->Vt->value[x], 1, xv2, 1);
00622     tmp0 = sqrt(tmp0);
00623     xnorm = sqrt(svd_ddot(n, xv2, 1, xv2, 1));
00624       
00625     /* multiply by matrix A to get (scaled) left s-vector */
00626     svd_opa(A, R->Vt->value[x], R->Ut->value[x]);
00627     tmp1 = 1.0 / tmp0;
00628     svd_dscal(A->rows, tmp1, R->Ut->value[x], 1);
00629     xnorm *= tmp1;
00630     bnd[i] = xnorm;
00631     R->S[x] = tmp0;
00632   }
00633 
00634   SAFE_FREE(xv2);
00635   return nsig;
00636 }
00637 
00638 /***********************************************************************
00639  *                                                                     *
00640  *                          lanso()                                    *
00641  *                                                                     *
00642  ***********************************************************************/
00643 /***********************************************************************
00644 
00645    Description
00646    -----------
00647 
00648    Function determines when the restart of the Lanczos algorithm should 
00649    occur and when it should terminate.
00650 
00651    Arguments 
00652    ---------
00653 
00654    (input)
00655    n         dimension of the eigenproblem for matrix B
00656    iterations    upper limit of desired number of lanczos steps           
00657    dimensions    upper limit of desired number of eigenpairs             
00658    endl      left end of interval containing unwanted eigenvalues
00659    endr      right end of interval containing unwanted eigenvalues
00660    ritz      array to hold the ritz values                       
00661    bnd       array to hold the error bounds                          
00662    wptr      array of pointers that point to work space:            
00663                wptr[0]-wptr[5]  six vectors of length n         
00664                wptr[6] array to hold diagonal of the tridiagonal matrix T
00665                wptr[9] array to hold off-diagonal of T  
00666                wptr[7] orthogonality estimate of Lanczos vectors at 
00667                  step j
00668                wptr[8] orthogonality estimate of Lanczos vectors at 
00669                  step j-1
00670 
00671    (output)
00672    j         number of Lanczos steps actually taken
00673    neig      number of ritz values stabilized
00674    ritz      array to hold the ritz values
00675    bnd       array to hold the error bounds
00676    ierr      (globally declared) error flag
00677              ierr = 8192 if stpone() fails to find a starting vector
00678              ierr = k if convergence did not occur for k-th eigenvalue
00679                     in imtqlb()
00680              ierr = 0 otherwise
00681 
00682 
00683    Functions used
00684    --------------
00685 
00686    LAS          stpone, error_bound, lanczos_step
00687    MISC         svd_dsort2
00688    UTILITY      svd_imin, svd_imax
00689 
00690  ***********************************************************************/
00691 
00692 int lanso(SMat A, long iterations, long dimensions, double endl,
00693           double endr, double *ritz, double *bnd, double *wptr[], 
00694           long *neigp, long n) {
00695   double *alf, *eta, *oldeta, *bet, *wrk, rnm, tol;
00696   long ll, first, last, ENOUGH, id2, id3, i, l, neig, j = 0, intro = 0;
00697   
00698   alf = wptr[6];
00699   eta = wptr[7];
00700   oldeta = wptr[8];
00701   bet = wptr[9];
00702   wrk = wptr[5];
00703   
00704   /* take the first step */
00705   stpone(A, wptr, &rnm, &tol, n);
00706   if (!rnm || ierr) return 0;
00707   eta[0] = eps1;
00708   oldeta[0] = eps1;
00709   ll = 0;
00710   first = 1;
00711   last = svd_imin(dimensions + svd_imax(8, dimensions), iterations);
00712   ENOUGH = FALSE;
00713   /*id1 = 0;*/
00714   while (/*id1 < dimensions && */!ENOUGH) {
00715     if (rnm <= tol) rnm = 0.0;
00716     
00717     /* the actual lanczos loop */
00718     j = lanczos_step(A, first, last, wptr, alf, eta, oldeta, bet, &ll,
00719                      &ENOUGH, &rnm, &tol, n);
00720     if (ENOUGH) j = j - 1;
00721     else j = last - 1;
00722     first = j + 1;
00723     bet[j+1] = rnm;
00724     
00725     /* analyze T */
00726     l = 0;
00727     for (id2 = 0; id2 < j; id2++) {
00728       if (l > j) break;
00729       for (i = l; i <= j; i++) if (!bet[i+1]) break;
00730       if (i > j) i = j;
00731       
00732       /* now i is at the end of an unreduced submatrix */
00733       svd_dcopy(i-l+1, &alf[l],   1, &ritz[l],  -1);
00734       svd_dcopy(i-l,   &bet[l+1], 1, &wrk[l+1], -1);
00735       
00736       imtqlb(i-l+1, &ritz[l], &wrk[l], &bnd[l]);
00737       
00738       if (ierr) {
00739         svd_error("svdLAS2: imtqlb failed to converge (ierr = %ld)\n", ierr);
00740         svd_error("  l = %ld  i = %ld\n", l, i);
00741         for (id3 = l; id3 <= i; id3++) 
00742           svd_error("  %ld  %lg  %lg  %lg\n", 
00743                     id3, ritz[id3], wrk[id3], bnd[id3]);
00744       }
00745       for (id3 = l; id3 <= i; id3++) 
00746         bnd[id3] = rnm * fabs(bnd[id3]);
00747       l = i + 1;
00748     }
00749     
00750     /* sort eigenvalues into increasing order */
00751     svd_dsort2((j+1) / 2, j + 1, ritz, bnd);
00752 
00753     /*    for (i = 0; i < iterations; i++)
00754       printf("%f ", ritz[i]);
00755       printf("\n"); */
00756     
00757     /* massage error bounds for very close ritz values */
00758     neig = error_bound(&ENOUGH, endl, endr, ritz, bnd, j, tol);
00759     *neigp = neig;
00760     
00761     /* should we stop? */
00762     if (neig < dimensions) {
00763       if (!neig) {
00764         last = first + 9;
00765         intro = first;
00766       } else last = first + svd_imax(3, 1 + ((j - intro) * (dimensions-neig)) /
00767                                      neig);
00768       last = svd_imin(last, iterations);
00769     } else ENOUGH = TRUE;
00770     ENOUGH = ENOUGH || first >= iterations;
00771     /* id1++; */
00772     /* printf("id1=%d dimen=%d first=%d\n", id1, dimensions, first); */
00773   }
00774   store(n, STORQ, j, wptr[1]);
00775   return j;
00776 }
00777 
00778 
00779 /***********************************************************************
00780  *                                                                     *
00781  *                      lanczos_step()                                 *
00782  *                                                                     *
00783  ***********************************************************************/
00784 /***********************************************************************
00785 
00786    Description
00787    -----------
00788 
00789    Function embodies a single Lanczos step
00790 
00791    Arguments 
00792    ---------
00793 
00794    (input)
00795    n        dimension of the eigenproblem for matrix B
00796    first    start of index through loop                               
00797    last     end of index through loop                                
00798    wptr     array of pointers pointing to work space                
00799    alf      array to hold diagonal of the tridiagonal matrix T
00800    eta      orthogonality estimate of Lanczos vectors at step j   
00801    oldeta   orthogonality estimate of Lanczos vectors at step j-1
00802    bet      array to hold off-diagonal of T                     
00803    ll       number of intitial Lanczos vectors in local orthog. 
00804               (has value of 0, 1 or 2)                  
00805    enough   stop flag                   
00806 
00807    Functions used
00808    --------------
00809 
00810    BLAS         svd_ddot, svd_dscal, svd_daxpy, svd_datx, svd_dcopy
00811    USER         store
00812    LAS          purge, ortbnd, startv
00813    UTILITY      svd_imin, svd_imax
00814 
00815  ***********************************************************************/
00816 
00817 long lanczos_step(SMat A, long first, long last, double *wptr[],
00818                   double *alf, double *eta, double *oldeta,
00819                   double *bet, long *ll, long *enough, double *rnmp, 
00820                   double *tolp, long n) {
00821    double t, *mid, rnm = *rnmp, tol = *tolp, anorm;
00822    long i, j;
00823 
00824    for (j=first; j<last; j++) {
00825       mid     = wptr[2];
00826       wptr[2] = wptr[1];
00827       wptr[1] = mid;
00828       mid     = wptr[3];
00829       wptr[3] = wptr[4];
00830       wptr[4] = mid;
00831 
00832       store(n, STORQ, j-1, wptr[2]);
00833       if (j-1 < MAXLL) store(n, STORP, j-1, wptr[4]);
00834       bet[j] = rnm;
00835 
00836       /* restart if invariant subspace is found */
00837       if (!bet[j]) {
00838          rnm = startv(A, wptr, j, n);
00839          if (ierr) return j;
00840          if (!rnm) *enough = TRUE;
00841       }
00842       if (*enough) {
00843         /* added by Doug... */
00844         /* These lines fix a bug that occurs with low-rank matrices */
00845         mid     = wptr[2];
00846         wptr[2] = wptr[1];
00847         wptr[1] = mid;
00848         /* ...added by Doug */
00849         break;
00850       }
00851 
00852       /* take a lanczos step */
00853       t = 1.0 / rnm;
00854       svd_datx(n, t, wptr[0], 1, wptr[1], 1);
00855       svd_dscal(n, t, wptr[3], 1);
00856       svd_opb(A, wptr[3], wptr[0], OPBTemp);
00857       svd_daxpy(n, -rnm, wptr[2], 1, wptr[0], 1);
00858       alf[j] = svd_ddot(n, wptr[0], 1, wptr[3], 1);
00859       svd_daxpy(n, -alf[j], wptr[1], 1, wptr[0], 1);
00860 
00861       /* orthogonalize against initial lanczos vectors */
00862       if (j <= MAXLL && (fabs(alf[j-1]) > 4.0 * fabs(alf[j])))
00863          *ll = j;  
00864       for (i=0; i < svd_imin(*ll, j-1); i++) {
00865          store(n, RETRP, i, wptr[5]);
00866          t = svd_ddot(n, wptr[5], 1, wptr[0], 1);
00867          store(n, RETRQ, i, wptr[5]);
00868          svd_daxpy(n, -t, wptr[5], 1, wptr[0], 1);
00869          eta[i] = eps1;
00870          oldeta[i] = eps1;
00871       }
00872 
00873       /* extended local reorthogonalization */
00874       t = svd_ddot(n, wptr[0], 1, wptr[4], 1);
00875       svd_daxpy(n, -t, wptr[2], 1, wptr[0], 1);
00876       if (bet[j] > 0.0) bet[j] = bet[j] + t;
00877       t = svd_ddot(n, wptr[0], 1, wptr[3], 1);
00878       svd_daxpy(n, -t, wptr[1], 1, wptr[0], 1);
00879       alf[j] = alf[j] + t;
00880       svd_dcopy(n, wptr[0], 1, wptr[4], 1);
00881       rnm = sqrt(svd_ddot(n, wptr[0], 1, wptr[4], 1));
00882       anorm = bet[j] + fabs(alf[j]) + rnm;
00883       tol = reps * anorm;
00884 
00885       /* update the orthogonality bounds */
00886       ortbnd(alf, eta, oldeta, bet, j, rnm);
00887 
00888       /* restore the orthogonality state when needed */
00889       purge(n, *ll, wptr[0], wptr[1], wptr[4], wptr[3], wptr[5], eta, oldeta,
00890             j, &rnm, tol);
00891       if (rnm <= tol) rnm = 0.0;
00892    }
00893    *rnmp = rnm;
00894    *tolp = tol;
00895    return j;
00896 }
00897 
00898 /***********************************************************************
00899  *                                                                     *
00900  *                          ortbnd()                                   *
00901  *                                                                     *
00902  ***********************************************************************/
00903 /***********************************************************************
00904 
00905    Description
00906    -----------
00907 
00908    Funtion updates the eta recurrence
00909 
00910    Arguments 
00911    ---------
00912 
00913    (input)
00914    alf      array to hold diagonal of the tridiagonal matrix T         
00915    eta      orthogonality estimate of Lanczos vectors at step j        
00916    oldeta   orthogonality estimate of Lanczos vectors at step j-1     
00917    bet      array to hold off-diagonal of T                          
00918    n        dimension of the eigenproblem for matrix B              
00919    j        dimension of T                                        
00920    rnm      norm of the next residual vector                     
00921    eps1     roundoff estimate for dot product of two unit vectors
00922 
00923    (output)
00924    eta      orthogonality estimate of Lanczos vectors at step j+1     
00925    oldeta   orthogonality estimate of Lanczos vectors at step j        
00926 
00927 
00928    Functions used
00929    --------------
00930 
00931    BLAS         svd_dswap
00932 
00933  ***********************************************************************/
00934 
00935 void ortbnd(double *alf, double *eta, double *oldeta, double *bet, long step,
00936             double rnm) {
00937    long i;
00938    if (step < 1) return;
00939    if (rnm) {
00940       if (step > 1) {
00941          oldeta[0] = (bet[1] * eta[1] + (alf[0]-alf[step]) * eta[0] -
00942                       bet[step] * oldeta[0]) / rnm + eps1;
00943       }
00944       for (i=1; i<=step-2; i++) 
00945          oldeta[i] = (bet[i+1] * eta[i+1] + (alf[i]-alf[step]) * eta[i] +
00946                       bet[i] * eta[i-1] - bet[step] * oldeta[i])/rnm + eps1;
00947    }
00948    oldeta[step-1] = eps1;
00949    svd_dswap(step, oldeta, 1, eta, 1);  
00950    eta[step] = eps1;
00951    return;
00952 }
00953 
00954 /***********************************************************************
00955  *                                                                     *
00956  *                              purge()                                *
00957  *                                                                     *
00958  ***********************************************************************/
00959 /***********************************************************************
00960 
00961    Description
00962    -----------
00963 
00964    Function examines the state of orthogonality between the new Lanczos
00965    vector and the previous ones to decide whether re-orthogonalization 
00966    should be performed
00967 
00968 
00969    Arguments 
00970    ---------
00971 
00972    (input)
00973    n        dimension of the eigenproblem for matrix B                 
00974    ll       number of intitial Lanczos vectors in local orthog.       
00975    r        residual vector to become next Lanczos vector            
00976    q        current Lanczos vector                                 
00977    ra       previous Lanczos vector
00978    qa       previous Lanczos vector
00979    wrk      temporary vector to hold the previous Lanczos vector
00980    eta      state of orthogonality between r and prev. Lanczos vectors 
00981    oldeta   state of orthogonality between q and prev. Lanczos vectors
00982    j        current Lanczos step                                     
00983 
00984    (output)
00985    r        residual vector orthogonalized against previous Lanczos 
00986               vectors
00987    q        current Lanczos vector orthogonalized against previous ones
00988 
00989 
00990    Functions used
00991    --------------
00992 
00993    BLAS         svd_daxpy,  svd_dcopy,  svd_idamax,  svd_ddot
00994    USER         store
00995 
00996  ***********************************************************************/
00997 
00998 void purge(long n, long ll, double *r, double *q, double *ra,  
00999            double *qa, double *wrk, double *eta, double *oldeta, long step, 
01000            double *rnmp, double tol) {
01001   double t, tq, tr, reps1, rnm = *rnmp;
01002   long k, iteration, flag, i;
01003   
01004   if (step < ll+2) return; 
01005   
01006   k = svd_idamax(step - (ll+1), &eta[ll], 1) + ll;
01007   if (fabs(eta[k]) > reps) {
01008     reps1 = eps1 / reps;
01009     iteration = 0;
01010     flag = TRUE;
01011     while (iteration < 2 && flag) {
01012       if (rnm > tol) {
01013         
01014         /* bring in a lanczos vector t and orthogonalize both 
01015          * r and q against it */
01016         tq = 0.0;
01017         tr = 0.0;
01018         for (i = ll; i < step; i++) {
01019           store(n,  RETRQ,  i,  wrk);
01020           t   = -svd_ddot(n, qa, 1, wrk, 1);
01021           tq += fabs(t);
01022           svd_daxpy(n,  t,  wrk,  1,  q,  1);
01023           t   = -svd_ddot(n, ra, 1, wrk, 1);
01024           tr += fabs(t);
01025           svd_daxpy(n, t, wrk, 1, r, 1);
01026         }
01027         svd_dcopy(n, q, 1, qa, 1);
01028         t   = -svd_ddot(n, r, 1, qa, 1);
01029         tr += fabs(t);
01030         svd_daxpy(n, t, q, 1, r, 1);
01031         svd_dcopy(n, r, 1, ra, 1);
01032         rnm = sqrt(svd_ddot(n, ra, 1, r, 1));
01033         if (tq <= reps1 && tr <= reps1 * rnm) flag = FALSE;
01034       }
01035       iteration++;
01036     }
01037     for (i = ll; i <= step; i++) { 
01038       eta[i] = eps1;
01039       oldeta[i] = eps1;
01040     }
01041   }
01042   *rnmp = rnm;
01043   return;
01044 }
01045 
01046 
01047 /***********************************************************************
01048  *                                                                     *
01049  *                         stpone()                                    *
01050  *                                                                     *
01051  ***********************************************************************/
01052 /***********************************************************************
01053 
01054    Description
01055    -----------
01056 
01057    Function performs the first step of the Lanczos algorithm.  It also
01058    does a step of extended local re-orthogonalization.
01059 
01060    Arguments 
01061    ---------
01062 
01063    (input)
01064    n      dimension of the eigenproblem for matrix B
01065 
01066    (output)
01067    ierr   error flag
01068    wptr   array of pointers that point to work space that contains
01069             wptr[0]             r[j]
01070             wptr[1]             q[j]
01071             wptr[2]             q[j-1]
01072             wptr[3]             p
01073             wptr[4]             p[j-1]
01074             wptr[6]             diagonal elements of matrix T 
01075 
01076 
01077    Functions used
01078    --------------
01079 
01080    BLAS         svd_daxpy, svd_datx, svd_dcopy, svd_ddot, svd_dscal
01081    USER         store, opb
01082    LAS          startv
01083 
01084  ***********************************************************************/
01085 
01086 void stpone(SMat A, double *wrkptr[], double *rnmp, double *tolp, long n) {
01087    double t, *alf, rnm, anorm;
01088    alf = wrkptr[6];
01089 
01090    /* get initial vector; default is random */
01091    rnm = startv(A, wrkptr, 0, n);
01092    if (rnm == 0.0 || ierr != 0) return;
01093 
01094    /* normalize starting vector */
01095    t = 1.0 / rnm;
01096    svd_datx(n, t, wrkptr[0], 1, wrkptr[1], 1);
01097    svd_dscal(n, t, wrkptr[3], 1);
01098 
01099    /* take the first step */
01100    svd_opb(A, wrkptr[3], wrkptr[0], OPBTemp);
01101    alf[0] = svd_ddot(n, wrkptr[0], 1, wrkptr[3], 1);
01102    svd_daxpy(n, -alf[0], wrkptr[1], 1, wrkptr[0], 1);
01103    t = svd_ddot(n, wrkptr[0], 1, wrkptr[3], 1);
01104    svd_daxpy(n, -t, wrkptr[1], 1, wrkptr[0], 1);
01105    alf[0] += t;
01106    svd_dcopy(n, wrkptr[0], 1, wrkptr[4], 1);
01107    rnm = sqrt(svd_ddot(n, wrkptr[0], 1, wrkptr[4], 1));
01108    anorm = rnm + fabs(alf[0]);
01109    *rnmp = rnm;
01110    *tolp = reps * anorm;
01111 
01112    return;
01113 }
01114 
01115 /***********************************************************************
01116  *                                                                     *
01117  *                         startv()                                    *
01118  *                                                                     *
01119  ***********************************************************************/
01120 /***********************************************************************
01121 
01122    Description
01123    -----------
01124 
01125    Function delivers a starting vector in r and returns |r|; it returns 
01126    zero if the range is spanned, and ierr is non-zero if no starting 
01127    vector within range of operator can be found.
01128 
01129    Parameters 
01130    ---------
01131 
01132    (input)
01133    n      dimension of the eigenproblem matrix B
01134    wptr   array of pointers that point to work space
01135    j      starting index for a Lanczos run
01136    eps    machine epsilon (relative precision)
01137 
01138    (output)
01139    wptr   array of pointers that point to work space that contains
01140           r[j], q[j], q[j-1], p[j], p[j-1]
01141    ierr   error flag (nonzero if no starting vector can be found)
01142 
01143    Functions used
01144    --------------
01145 
01146    BLAS         svd_ddot, svd_dcopy, svd_daxpy
01147    USER         svd_opb, store
01148    MISC         random
01149 
01150  ***********************************************************************/
01151 
01152 double startv(SMat A, double *wptr[], long step, long n) {
01153    double rnm2, *r, t;
01154    long irand;
01155    long id, i;
01156 
01157    /* get initial vector; default is random */
01158    rnm2 = svd_ddot(n, wptr[0], 1, wptr[0], 1);
01159    irand = 918273 + step;
01160    r = wptr[0];
01161    for (id = 0; id < 3; id++) {
01162       if (id > 0 || step > 0 || rnm2 == 0) 
01163          for (i = 0; i < n; i++) r[i] = svd_random2(&irand);
01164       svd_dcopy(n, wptr[0], 1, wptr[3], 1);
01165 
01166       /* apply operator to put r in range (essential if m singular) */
01167       svd_opb(A, wptr[3], wptr[0], OPBTemp);
01168       svd_dcopy(n, wptr[0], 1, wptr[3], 1);
01169       rnm2 = svd_ddot(n, wptr[0], 1, wptr[3], 1);
01170       if (rnm2 > 0.0) break;
01171    }
01172 
01173    /* fatal error */
01174    if (rnm2 <= 0.0) {
01175       ierr = 8192;
01176       return(-1);
01177    }
01178    if (step > 0) {
01179       for (i = 0; i < step; i++) {
01180          store(n, RETRQ, i, wptr[5]);
01181          t = -svd_ddot(n, wptr[3], 1, wptr[5], 1);
01182          svd_daxpy(n, t, wptr[5], 1, wptr[0], 1);
01183       }
01184 
01185       /* make sure q[step] is orthogonal to q[step-1] */
01186       t = svd_ddot(n, wptr[4], 1, wptr[0], 1);
01187       svd_daxpy(n, -t, wptr[2], 1, wptr[0], 1);
01188       svd_dcopy(n, wptr[0], 1, wptr[3], 1);
01189       t = svd_ddot(n, wptr[3], 1, wptr[0], 1);
01190       if (t <= eps * rnm2) t = 0.0;
01191       rnm2 = t;
01192    }
01193    return(sqrt(rnm2));
01194 }
01195 
01196 /***********************************************************************
01197  *                                                                     *
01198  *                      error_bound()                                  *
01199  *                                                                     *
01200  ***********************************************************************/
01201 /***********************************************************************
01202 
01203    Description
01204    -----------
01205 
01206    Function massages error bounds for very close ritz values by placing 
01207    a gap between them.  The error bounds are then refined to reflect 
01208    this.
01209 
01210 
01211    Arguments 
01212    ---------
01213 
01214    (input)
01215    endl     left end of interval containing unwanted eigenvalues
01216    endr     right end of interval containing unwanted eigenvalues
01217    ritz     array to store the ritz values
01218    bnd      array to store the error bounds
01219    enough   stop flag
01220 
01221 
01222    Functions used
01223    --------------
01224 
01225    BLAS         svd_idamax
01226    UTILITY      svd_dmin
01227 
01228  ***********************************************************************/
01229 
01230 long error_bound(long *enough, double endl, double endr, 
01231                  double *ritz, double *bnd, long step, double tol) {
01232   long mid, i, neig;
01233   double gapl, gap;
01234   
01235   /* massage error bounds for very close ritz values */
01236   mid = svd_idamax(step + 1, bnd, 1);
01237 
01238   for (i=((step+1) + (step-1)) / 2; i >= mid + 1; i -= 1)
01239     if (fabs(ritz[i-1] - ritz[i]) < eps34 * fabs(ritz[i])) 
01240       if (bnd[i] > tol && bnd[i-1] > tol) {
01241         bnd[i-1] = sqrt(bnd[i] * bnd[i] + bnd[i-1] * bnd[i-1]);
01242         bnd[i] = 0.0;
01243       }
01244   
01245   
01246   for (i=((step+1) - (step-1)) / 2; i <= mid - 1; i +=1 ) 
01247     if (fabs(ritz[i+1] - ritz[i]) < eps34 * fabs(ritz[i])) 
01248       if (bnd[i] > tol && bnd[i+1] > tol) {
01249         bnd[i+1] = sqrt(bnd[i] * bnd[i] + bnd[i+1] * bnd[i+1]);
01250         bnd[i] = 0.0;
01251       }
01252   
01253   /* refine the error bounds */
01254   neig = 0;
01255   gapl = ritz[step] - ritz[0];
01256   for (i = 0; i <= step; i++) {
01257     gap = gapl;
01258     if (i < step) gapl = ritz[i+1] - ritz[i];
01259     gap = svd_dmin(gap, gapl);
01260     if (gap > bnd[i]) bnd[i] = bnd[i] * (bnd[i] / gap);
01261     if (bnd[i] <= 16.0 * eps * fabs(ritz[i])) {
01262       neig++;
01263       if (!*enough) *enough = endl < ritz[i] && ritz[i] < endr;
01264     }
01265   }   
01266   return neig;
01267 }
01268 
01269 /***********************************************************************
01270  *                                                                     *
01271  *                              imtqlb()                               *
01272  *                                                                     *
01273  ***********************************************************************/
01274 /***********************************************************************
01275 
01276    Description
01277    -----------
01278 
01279    imtqlb() is a translation of a Fortran version of the Algol
01280    procedure IMTQL1, Num. Math. 12, 377-383(1968) by Martin and 
01281    Wilkinson, as modified in Num. Math. 15, 450(1970) by Dubrulle.  
01282    Handbook for Auto. Comp., vol.II-Linear Algebra, 241-248(1971).  
01283    See also B. T. Smith et al, Eispack Guide, Lecture Notes in 
01284    Computer Science, Springer-Verlag, (1976).
01285 
01286    The function finds the eigenvalues of a symmetric tridiagonal
01287    matrix by the implicit QL method.
01288 
01289 
01290    Arguments 
01291    ---------
01292 
01293    (input)
01294    n      order of the symmetric tridiagonal matrix                   
01295    d      contains the diagonal elements of the input matrix           
01296    e      contains the subdiagonal elements of the input matrix in its
01297           last n-1 positions.  e[0] is arbitrary                     
01298 
01299    (output)
01300    d      contains the eigenvalues in ascending order.  if an error
01301             exit is made, the eigenvalues are correct and ordered for
01302             indices 0,1,...ierr, but may not be the smallest eigenvalues.
01303    e      has been destroyed.                                       
01304    ierr   set to zero for normal return, j if the j-th eigenvalue has
01305             not been determined after 30 iterations.                
01306 
01307    Functions used
01308    --------------
01309 
01310    UTILITY      svd_fsign
01311    MISC         svd_pythag
01312 
01313  ***********************************************************************/
01314 
01315 void imtqlb(long n, double d[], double e[], double bnd[])
01316 
01317 {
01318    long last, l, m, i, iteration;
01319 
01320    /* various flags */
01321    long exchange, convergence, underflow;       
01322 
01323    double b, test, g, r, s, c, p, f;
01324 
01325    if (n == 1) return;
01326    ierr = 0;
01327    bnd[0] = 1.0;
01328    last = n - 1;
01329    for (i = 1; i < n; i++) {
01330       bnd[i] = 0.0;
01331       e[i-1] = e[i];
01332    }
01333    e[last] = 0.0;
01334    for (l = 0; l < n; l++) {
01335       iteration = 0;
01336       while (iteration <= 30) {
01337          for (m = l; m < n; m++) {
01338             convergence = FALSE;
01339             if (m == last) break;
01340             else {
01341                test = fabs(d[m]) + fabs(d[m+1]);
01342                if (test + fabs(e[m]) == test) convergence = TRUE;
01343             }
01344             if (convergence) break;
01345          }
01346             p = d[l]; 
01347             f = bnd[l]; 
01348          if (m != l) {
01349             if (iteration == 30) {
01350                ierr = l;
01351                return;
01352             }
01353             iteration += 1;
01354             /*........ form shift ........*/
01355             g = (d[l+1] - p) / (2.0 * e[l]);
01356             r = svd_pythag(g, 1.0);
01357             g = d[m] - p + e[l] / (g + svd_fsign(r, g));
01358             s = 1.0;
01359             c = 1.0;
01360             p = 0.0;
01361             underflow = FALSE;
01362             i = m - 1;
01363             while (underflow == FALSE && i >= l) {
01364                f = s * e[i];
01365                b = c * e[i];
01366                r = svd_pythag(f, g);
01367                e[i+1] = r;
01368                if (r == 0.0) underflow = TRUE;
01369                else {
01370                   s = f / r;
01371                   c = g / r;
01372                   g = d[i+1] - p;
01373                   r = (d[i] - g) * s + 2.0 * c * b;
01374                   p = s * r;
01375                   d[i+1] = g + p;
01376                   g = c * r - b;
01377                   f = bnd[i+1];
01378                   bnd[i+1] = s * bnd[i] + c * f;
01379                   bnd[i] = c * bnd[i] - s * f;
01380                   i--;
01381                }
01382             }       /* end while (underflow != FALSE && i >= l) */
01383             /*........ recover from underflow .........*/
01384             if (underflow) {
01385                d[i+1] -= p;
01386                e[m] = 0.0;
01387             }
01388             else {
01389                d[l] -= p;
01390                e[l] = g;
01391                e[m] = 0.0;
01392             }
01393          }                                 /* end if (m != l) */
01394          else {
01395 
01396             /* order the eigenvalues */
01397             exchange = TRUE;
01398             if (l != 0) {
01399                i = l;
01400                while (i >= 1 && exchange == TRUE) {
01401                   if (p < d[i-1]) {
01402                      d[i] = d[i-1];
01403                      bnd[i] = bnd[i-1];
01404                      i--;
01405                   }
01406                   else exchange = FALSE;
01407                }
01408             }
01409             if (exchange) i = 0;
01410             d[i] = p;
01411             bnd[i] = f; 
01412             iteration = 31;
01413          }
01414       }                        /* end while (iteration <= 30) */
01415    }                               /* end for (l=0; l<n; l++) */
01416    return;
01417 }                                                 /* end main */
01418 
01419 /***********************************************************************
01420  *                                                                     *
01421  *                              imtql2()                               *
01422  *                                                                     *
01423  ***********************************************************************/
01424 /***********************************************************************
01425 
01426    Description
01427    -----------
01428 
01429    imtql2() is a translation of a Fortran version of the Algol
01430    procedure IMTQL2, Num. Math. 12, 377-383(1968) by Martin and 
01431    Wilkinson, as modified in Num. Math. 15, 450(1970) by Dubrulle.  
01432    Handbook for Auto. Comp., vol.II-Linear Algebra, 241-248(1971).  
01433    See also B. T. Smith et al, Eispack Guide, Lecture Notes in 
01434    Computer Science, Springer-Verlag, (1976).
01435 
01436    This function finds the eigenvalues and eigenvectors of a symmetric
01437    tridiagonal matrix by the implicit QL method.
01438 
01439 
01440    Arguments
01441    ---------
01442 
01443    (input)                                                             
01444    nm     row dimension of the symmetric tridiagonal matrix           
01445    n      order of the matrix                                        
01446    d      contains the diagonal elements of the input matrix        
01447    e      contains the subdiagonal elements of the input matrix in its
01448             last n-1 positions.  e[0] is arbitrary                   
01449    z      contains the identity matrix                              
01450                                                                    
01451    (output)                                                       
01452    d      contains the eigenvalues in ascending order.  if an error
01453             exit is made, the eigenvalues are correct but unordered for
01454             for indices 0,1,...,ierr.                              
01455    e      has been destroyed.                                     
01456    z      contains orthonormal eigenvectors of the symmetric   
01457             tridiagonal (or full) matrix.  if an error exit is made,
01458             z contains the eigenvectors associated with the stored 
01459           eigenvalues.                                  
01460    ierr   set to zero for normal return, j if the j-th eigenvalue has
01461             not been determined after 30 iterations.                
01462 
01463 
01464    Functions used
01465    --------------
01466    UTILITY      svd_fsign
01467    MISC         svd_pythag
01468 
01469  ***********************************************************************/
01470 
01471 void imtql2(long nm, long n, double d[], double e[], double z[])
01472 
01473 {
01474    long index, nnm, j, last, l, m, i, k, iteration, convergence, underflow;
01475    double b, test, g, r, s, c, p, f;
01476    if (n == 1) return;
01477    ierr = 0;
01478    last = n - 1;
01479    for (i = 1; i < n; i++) e[i-1] = e[i];
01480    e[last] = 0.0;
01481    nnm = n * nm;
01482    for (l = 0; l < n; l++) {
01483       iteration = 0;
01484 
01485       /* look for small sub-diagonal element */
01486       while (iteration <= 30) {
01487          for (m = l; m < n; m++) {
01488             convergence = FALSE;
01489             if (m == last) break;
01490             else {
01491                test = fabs(d[m]) + fabs(d[m+1]);
01492                if (test + fabs(e[m]) == test) convergence = TRUE;
01493             }
01494             if (convergence) break;
01495          }
01496          if (m != l) {
01497 
01498             /* set error -- no convergence to an eigenvalue after
01499              * 30 iterations. */     
01500             if (iteration == 30) {
01501                ierr = l;
01502                return;
01503             }
01504             p = d[l]; 
01505             iteration += 1;
01506 
01507             /* form shift */
01508             g = (d[l+1] - p) / (2.0 * e[l]);
01509             r = svd_pythag(g, 1.0);
01510             g = d[m] - p + e[l] / (g + svd_fsign(r, g));
01511             s = 1.0;
01512             c = 1.0;
01513             p = 0.0;
01514             underflow = FALSE;
01515             i = m - 1;
01516             while (underflow == FALSE && i >= l) {
01517                f = s * e[i];
01518                b = c * e[i];
01519                r = svd_pythag(f, g);
01520                e[i+1] = r;
01521                if (r == 0.0) underflow = TRUE;
01522                else {
01523                   s = f / r;
01524                   c = g / r;
01525                   g = d[i+1] - p;
01526                   r = (d[i] - g) * s + 2.0 * c * b;
01527                   p = s * r;
01528                   d[i+1] = g + p;
01529                   g = c * r - b;
01530 
01531                   /* form vector */
01532                   for (k = 0; k < nnm; k += n) {
01533                      index = k + i;
01534                      f = z[index+1];
01535                      z[index+1] = s * z[index] + c * f;
01536                      z[index] = c * z[index] - s * f;
01537                   } 
01538                   i--;
01539                }
01540             }   /* end while (underflow != FALSE && i >= l) */
01541             /*........ recover from underflow .........*/
01542             if (underflow) {
01543                d[i+1] -= p;
01544                e[m] = 0.0;
01545             }
01546             else {
01547                d[l] -= p;
01548                e[l] = g;
01549                e[m] = 0.0;
01550             }
01551          }
01552          else break;
01553       }         /*...... end while (iteration <= 30) .........*/
01554    }            /*...... end for (l=0; l<n; l++) .............*/
01555 
01556    /* order the eigenvalues */
01557    for (l = 1; l < n; l++) {
01558       i = l - 1;
01559       k = i;
01560       p = d[i];
01561       for (j = l; j < n; j++) {
01562          if (d[j] < p) {
01563             k = j;
01564             p = d[j];
01565          }
01566       }
01567       /* ...and corresponding eigenvectors */
01568       if (k != i) {
01569          d[k] = d[i];
01570          d[i] = p;
01571           for (j = 0; j < nnm; j += n) {
01572              p = z[j+i];
01573              z[j+i] = z[j+k];
01574              z[j+k] = p;
01575           }
01576       }   
01577    }
01578    return;
01579 }               /*...... end main ............................*/
01580 
01581 /***********************************************************************
01582  *                                                                     *
01583  *                              machar()                               *
01584  *                                                                     *
01585  ***********************************************************************/
01586 /***********************************************************************
01587 
01588    Description
01589    -----------
01590 
01591    This function is a partial translation of a Fortran-77 subroutine 
01592    written by W. J. Cody of Argonne National Laboratory.
01593    It dynamically determines the listed machine parameters of the
01594    floating-point arithmetic.  According to the documentation of
01595    the Fortran code, "the determination of the first three uses an
01596    extension of an algorithm due to M. Malcolm, ACM 15 (1972), 
01597    pp. 949-951, incorporating some, but not all, of the improvements
01598    suggested by M. Gentleman and S. Marovich, CACM 17 (1974), 
01599    pp. 276-277."  The complete Fortran version of this translation is
01600    documented in W. J. Cody, "Machar: a Subroutine to Dynamically 
01601    Determine Determine Machine Parameters," TOMS 14, December, 1988.
01602 
01603 
01604    Parameters reported 
01605    -------------------
01606 
01607    ibeta     the radix for the floating-point representation       
01608    it        the number of base ibeta digits in the floating-point
01609                significand                                       
01610    irnd      0 if floating-point addition chops               
01611              1 if floating-point addition rounds, but not in the 
01612                  ieee style                                     
01613              2 if floating-point addition rounds in the ieee style
01614              3 if floating-point addition chops, and there is    
01615                  partial underflow                              
01616              4 if floating-point addition rounds, but not in the
01617                  ieee style, and there is partial underflow    
01618              5 if floating-point addition rounds in the ieee style,
01619                  and there is partial underflow                   
01620    machep    the largest negative integer such that              
01621                  1.0+float(ibeta)**machep .ne. 1.0, except that 
01622                  machep is bounded below by  -(it+3)          
01623    negeps    the largest negative integer such that          
01624                  1.0-float(ibeta)**negeps .ne. 1.0, except that 
01625                  negeps is bounded below by  -(it+3)           
01626 
01627  ***********************************************************************/
01628 
01629 void machar(long *ibeta, long *it, long *irnd, long *machep, long *negep) {
01630 
01631   volatile double beta, betain, betah, a, b, ZERO, ONE, TWO, temp, tempa,
01632     temp1;
01633   long i, itemp;
01634   
01635   ONE = (double) 1;
01636   TWO = ONE + ONE;
01637   ZERO = ONE - ONE;
01638   
01639   a = ONE;
01640   temp1 = ONE;
01641   while (temp1 - ONE == ZERO) {
01642     a = a + a;
01643     temp = a + ONE;
01644     temp1 = temp - a;
01645     b += a; /* to prevent icc compiler error */
01646   }
01647   b = ONE;
01648   itemp = 0;
01649   while (itemp == 0) {
01650     b = b + b;
01651     temp = a + b;
01652     itemp = (long)(temp - a);
01653   }
01654   *ibeta = itemp;
01655   beta = (double) *ibeta;
01656   
01657   *it = 0;
01658   b = ONE;
01659   temp1 = ONE;
01660   while (temp1 - ONE == ZERO) {
01661     *it = *it + 1;
01662     b = b * beta;
01663     temp = b + ONE;
01664     temp1 = temp - b;
01665   }
01666   *irnd = 0; 
01667   betah = beta / TWO; 
01668   temp = a + betah;
01669   if (temp - a != ZERO) *irnd = 1;
01670   tempa = a + beta;
01671   temp = tempa + betah;
01672   if ((*irnd == 0) && (temp - tempa != ZERO)) *irnd = 2;
01673   
01674   *negep = *it + 3;
01675   betain = ONE / beta;
01676   a = ONE;
01677   for (i = 0; i < *negep; i++) a = a * betain;
01678   b = a;
01679   temp = ONE - a;
01680   while (temp-ONE == ZERO) {
01681     a = a * beta;
01682     *negep = *negep - 1;
01683     temp = ONE - a;
01684   }
01685   *negep = -(*negep);
01686   
01687   *machep = -(*it) - 3;
01688   a = b;
01689   temp = ONE + a;
01690   while (temp - ONE == ZERO) {
01691     a = a * beta;
01692     *machep = *machep + 1;
01693     temp = ONE + a;
01694   }
01695   eps = a;
01696   return;
01697 }
01698 
01699 /***********************************************************************
01700  *                                                                     *
01701  *                     store()                                         *
01702  *                                                                     *
01703  ***********************************************************************/
01704 /***********************************************************************
01705 
01706    Description
01707    -----------
01708 
01709    store() is a user-supplied function which, based on the input
01710    operation flag, stores to or retrieves from memory a vector.
01711 
01712 
01713    Arguments 
01714    ---------
01715 
01716    (input)
01717    n       length of vector to be stored or retrieved
01718    isw     operation flag:
01719              isw = 1 request to store j-th Lanczos vector q(j)
01720              isw = 2 request to retrieve j-th Lanczos vector q(j)
01721              isw = 3 request to store q(j) for j = 0 or 1
01722              isw = 4 request to retrieve q(j) for j = 0 or 1
01723    s       contains the vector to be stored for a "store" request 
01724 
01725    (output)
01726    s       contains the vector retrieved for a "retrieve" request 
01727 
01728    Functions used
01729    --------------
01730 
01731    BLAS         svd_dcopy
01732 
01733  ***********************************************************************/
01734 
01735 void store(long n, long isw, long j, double *s) {
01736   /* printf("called store %ld %ld\n", isw, j); */
01737   switch(isw) {
01738   case STORQ:
01739     if (!LanStore[j + MAXLL]) {
01740       if (!(LanStore[j + MAXLL] = svd_doubleArray(n, FALSE, "LanStore[j]")))
01741         svd_fatalError("svdLAS2: failed to allocate LanStore[%d]", j + MAXLL);
01742     }
01743     svd_dcopy(n, s, 1, LanStore[j + MAXLL], 1);
01744     break;
01745   case RETRQ:   
01746     if (!LanStore[j + MAXLL])
01747       svd_fatalError("svdLAS2: store (RETRQ) called on index %d (not allocated)", 
01748                      j + MAXLL);
01749     svd_dcopy(n, LanStore[j + MAXLL], 1, s, 1);
01750     break;
01751   case STORP:   
01752     if (j >= MAXLL) {
01753       svd_error("svdLAS2: store (STORP) called with j >= MAXLL");
01754       break;
01755     }
01756     if (!LanStore[j]) {
01757       if (!(LanStore[j] = svd_doubleArray(n, FALSE, "LanStore[j]")))
01758         svd_fatalError("svdLAS2: failed to allocate LanStore[%d]", j);
01759     }
01760     svd_dcopy(n, s, 1, LanStore[j], 1);
01761     break;
01762   case RETRP:   
01763     if (j >= MAXLL) {
01764       svd_error("svdLAS2: store (RETRP) called with j >= MAXLL");
01765       break;
01766     }
01767     if (!LanStore[j])
01768       svd_fatalError("svdLAS2: store (RETRP) called on index %d (not allocated)", 
01769                      j);
01770     svd_dcopy(n, LanStore[j], 1, s, 1);
01771     break;
01772   }
01773   return;
01774 }