Newer
Older
/* lsqr.c
This C version of LSQR was first created by
Michael P Friedlander <mpf@cs.ubc.ca>
as part of his BCLS package:
http://www.cs.ubc.ca/~mpf/bcls/index.html.
The present file is maintained by
Michael Saunders <saunders@stanford.edu>
31 Aug 2007: First version of this file lsqr.c obtained from
Michael Friedlander's BCLS package, svn version number
$Revision: 273 $ $Date: 2006-09-04 15:59:04 -0700 (Mon, 04 Sep 2006) $
The stopping rules were slightly altered in that version.
They have been restored to the original rules used in the f77 LSQR.
Parallelized for ESA Gaia Mission. U. becciani A. Vecchiato 2013
*/
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdbool.h>
#include <math.h>
#include <time.h>
#include <mpi.h>
#include "util.h"
#include <limits.h>
//#ifdef __APPLE__
// #include <vecLib/vecLib.h>
//#else
// #include "cblas.h"
//#endif
#define ZERO 0.0
#define ONE 1.0
void aprod(int mode, long int m, long int n, double x[], double y[],
double *ra,long int *matrixIndex, int *instrCol,int *instrConstrIlung, struct comData comlsqr,time_t *ompSec);
// ---------------------------------------------------------------------
// d2norm returns sqrt( a**2 + b**2 ) with precautions
// to avoid overflow.
//
// 21 Mar 1990: First version.
// ---------------------------------------------------------------------
static double
d2norm( const double a, const double b )
scale = fabs( a ) + fabs( b );
return scale * sqrt( (a/scale)*(a/scale) + (b/scale)*(b/scale) );
dload( const long int n, const double alpha, double x[] )
{
#pragma omp for
for (i = 0; i < n; i++) x[i] = alpha;
return;
}
// ---------------------------------------------------------------------
// LSQR
// ---------------------------------------------------------------------
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
void lsqr(
long int m,
long int n,
// void (*aprod)(int mode, int m, int n, double x[], double y[],
// void *UsrWrk),
double damp,
// void *UsrWrk,
double *knownTerms, // len = m reported as u
double *vVect, // len = n reported as v
double *wVect, // len = n reported as w
double *xSolution, // len = n reported as x
double *standardError, // len at least n. May be NULL. reported as se
double atol,
double btol,
double conlim,
int itnlim,
// The remaining variables are output only.
int *istop_out,
int *itn_out,
double *anorm_out,
double *acond_out,
double *rnorm_out,
double *arnorm_out,
double *xnorm_out,
double *systemMatrix, // reported as a
long int *matrixIndex, // reported as janew
int *instrCol,
int *instrConstrIlung,
double *preCondVect,
struct comData comlsqr)
{
// ------------------------------------------------------------------
//
// LSQR finds a solution x to the following problems:
//
// 1. Unsymmetric equations -- solve A*x = b
//
// 2. Linear least squares -- solve A*x = b
// in the least-squares sense
//
// 3. Damped least squares -- solve ( A )*x = ( b )
// ( damp*I ) ( 0 )
// in the least-squares sense
//
// where A is a matrix with m rows and n columns, b is an
// m-vector, and damp is a scalar. (All quantities are real.)
// The matrix A is intended to be large and sparse. It is accessed
// by means of subroutine calls of the form
//
// aprod ( mode, m, n, x, y, UsrWrk )
//
// which must perform the following functions:
//
// If mode = 1, compute y = y + A*x.
// If mode = 2, compute x = x + A(transpose)*y.
//
// The vectors x and y are input parameters in both cases.
// If mode = 1, y should be altered without changing x.
// If mode = 2, x should be altered without changing y.
// The parameter UsrWrk may be used for workspace as described
// below.
//
// The rhs vector b is input via u, and subsequently overwritten.
//
//
// Note: LSQR uses an iterative method to approximate the solution.
// The number of iterations required to reach a certain accuracy
// depends strongly on the scaling of the problem. Poor scaling of
// the rows or columns of A should therefore be avoided where
// possible.
//
// For example, in problem 1 the solution is unaltered by
// row-scaling. If a row of A is very small or large compared to
// the other rows of A, the corresponding row of ( A b ) should be
// scaled up or down.
//
// In problems 1 and 2, the solution x is easily recovered
// following column-scaling. Unless better information is known,
// the nonzero columns of A should be scaled so that they all have
// the same Euclidean norm (e.g., 1.0).
//
// In problem 3, there is no freedom to re-scale if damp is
// nonzero. However, the value of damp should be assigned only
// after attention has been paid to the scaling of A.
//
// The parameter damp is intended to help regularize
// ill-conditioned systems, by preventing the true solution from
// being very large. Another aid to regularization is provided by
// the parameter acond, which may be used to terminate iterations
// before the computed solution becomes very large.
//
// Note that x is not an input parameter.
// If some initial estimate x0 is known and if damp = 0,
// one could proceed as follows:
//
// 1. Compute a residual vector r0 = b - A*x0.
// 2. Use LSQR to solve the system A*dx = r0.
// 3. Add the correction dx to obtain a final solution x = x0 + dx.
//
// This requires that x0 be available before and after the call
// to LSQR. To judge the benefits, suppose LSQR takes k1 iterations
// to solve A*x = b and k2 iterations to solve A*dx = r0.
// If x0 is "good", norm(r0) will be smaller than norm(b).
// If the same stopping tolerances atol and btol are used for each
// system, k1 and k2 will be similar, but the final solution x0 + dx
// should be more accurate. The only way to reduce the total work
// is to use a larger stopping tolerance for the second system.
// If some value btol is suitable for A*x = b, the larger value
// btol*norm(b)/norm(r0) should be suitable for A*dx = r0.
//
// Preconditioning is another way to reduce the number of iterations.
// If it is possible to solve a related system M*x = b efficiently,
// where M approximates A in some helpful way
// (e.g. M - A has low rank or its elements are small relative to
// those of A), LSQR may converge more rapidly on the system
// A*M(inverse)*z = b,
// after which x can be recovered by solving M*x = z.
//
// NOTE: If A is symmetric, LSQR should not be used!
// Alternatives are the symmetric conjugate-gradient method (cg)
// and/or SYMMLQ.
// SYMMLQ is an implementation of symmetric cg that applies to
// any symmetric A and will converge more rapidly than LSQR.
// If A is positive definite, there are other implementations of
// symmetric cg that require slightly less work per iteration
// than SYMMLQ (but will take the same number of iterations).
//
//
// Notation
// --------
//
Loading full blame...