1 *> \brief \b DLASQ2 computes all the eigenvalues of the symmetric positive definite tridiagonal matrix associated with the qd Array Z to high relative accuracy. Used by sbdsqr and sstegr.
3 * =========== DOCUMENTATION ===========
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
9 *> Download DLASQ2 + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasq2.f">
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasq2.f">
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasq2.f">
21 * SUBROUTINE DLASQ2( N, Z, INFO )
23 * .. Scalar Arguments ..
26 * .. Array Arguments ..
27 * DOUBLE PRECISION Z( * )
36 *> DLASQ2 computes all the eigenvalues of the symmetric positive
37 *> definite tridiagonal matrix associated with the qd array Z to high
38 *> relative accuracy are computed to high relative accuracy, in the
39 *> absence of denormalization, underflow and overflow.
41 *> To see the relation of Z to the tridiagonal matrix, let L be a
42 *> unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and
43 *> let U be an upper bidiagonal matrix with 1's above and diagonal
44 *> Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the
45 *> symmetric tridiagonal to which it is similar.
47 *> Note : DLASQ2 defines a logical variable, IEEE, which is true
48 *> on machines which follow ieee-754 floating-point standard in their
49 *> handling of infinities and NaNs, and false otherwise. This variable
50 *> is passed to DLASQ3.
59 *> The number of rows and columns in the matrix. N >= 0.
64 *> Z is DOUBLE PRECISION array, dimension ( 4*N )
65 *> On entry Z holds the qd array. On exit, entries 1 to N hold
66 *> the eigenvalues in decreasing order, Z( 2*N+1 ) holds the
67 *> trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If
68 *> N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 )
69 *> holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of
70 *> shifts that failed.
76 *> = 0: successful exit
77 *> < 0: if the i-th argument is a scalar and had an illegal
78 *> value, then INFO = -i, if the i-th argument is an
79 *> array and the j-entry had an illegal value, then
81 *> > 0: the algorithm failed
82 *> = 1, a split was marked by a positive value in E
83 *> = 2, current block of Z not diagonalized after 100*N
84 *> iterations (in inner while loop). On exit Z holds
85 *> a qd array with the same eigenvalues as the given Z.
86 *> = 3, termination criterion of outer while loop not met
87 *> (program created more than N unreduced blocks)
93 *> \author Univ. of Tennessee
94 *> \author Univ. of California Berkeley
95 *> \author Univ. of Colorado Denver
98 *> \date September 2012
100 *> \ingroup auxOTHERcomputational
102 *> \par Further Details:
103 * =====================
107 *> Local Variables: I0:N0 defines a current unreduced segment of Z.
108 *> The shifts are accumulated in SIGMA. Iteration count is in ITER.
109 *> Ping-pong is controlled by PP (alternates between 0 and 1).
112 * =====================================================================
113 SUBROUTINE DLASQ2( N, Z, INFO )
115 * -- LAPACK computational routine (version 3.4.2) --
116 * -- LAPACK is a software package provided by Univ. of Tennessee, --
117 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
120 * .. Scalar Arguments ..
123 * .. Array Arguments ..
124 DOUBLE PRECISION Z( * )
127 * =====================================================================
130 DOUBLE PRECISION CBIAS
131 PARAMETER ( CBIAS = 1.50D0 )
132 DOUBLE PRECISION ZERO, HALF, ONE, TWO, FOUR, HUNDRD
133 PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0,
134 $ TWO = 2.0D0, FOUR = 4.0D0, HUNDRD = 100.0D0 )
136 * .. Local Scalars ..
138 INTEGER I0, I1, I4, IINFO, IPN4, ITER, IWHILA, IWHILB,
139 $ K, KMIN, N0, N1, NBIG, NDIV, NFAIL, PP, SPLT,
141 DOUBLE PRECISION D, DEE, DEEMIN, DESIG, DMIN, DMIN1, DMIN2, DN,
142 $ DN1, DN2, E, EMAX, EMIN, EPS, G, OLDEMN, QMAX,
143 $ QMIN, S, SAFMIN, SIGMA, T, TAU, TEMP, TOL,
144 $ TOL2, TRACE, ZMAX, TEMPE, TEMPQ
146 * .. External Subroutines ..
147 EXTERNAL DLASQ3, DLASRT, XERBLA
149 * .. External Functions ..
151 DOUBLE PRECISION DLAMCH
152 EXTERNAL DLAMCH, ILAENV
154 * .. Intrinsic Functions ..
155 INTRINSIC ABS, DBLE, MAX, MIN, SQRT
157 * .. Executable Statements ..
159 * Test the input arguments.
160 * (in case DLASQ2 is not called by DLASQ1)
163 EPS = DLAMCH( 'Precision' )
164 SAFMIN = DLAMCH( 'Safe minimum' )
170 CALL XERBLA( 'DLASQ2', 1 )
172 ELSE IF( N.EQ.0 ) THEN
174 ELSE IF( N.EQ.1 ) THEN
178 IF( Z( 1 ).LT.ZERO ) THEN
180 CALL XERBLA( 'DLASQ2', 2 )
183 ELSE IF( N.EQ.2 ) THEN
187 IF( Z( 2 ).LT.ZERO .OR. Z( 3 ).LT.ZERO ) THEN
189 CALL XERBLA( 'DLASQ2', 2 )
191 ELSE IF( Z( 3 ).GT.Z( 1 ) ) THEN
196 Z( 5 ) = Z( 1 ) + Z( 2 ) + Z( 3 )
197 IF( Z( 2 ).GT.Z( 3 )*TOL2 ) THEN
198 T = HALF*( ( Z( 1 )-Z( 3 ) )+Z( 2 ) )
199 S = Z( 3 )*( Z( 2 ) / T )
201 S = Z( 3 )*( Z( 2 ) / ( T*( ONE+SQRT( ONE+S / T ) ) ) )
203 S = Z( 3 )*( Z( 2 ) / ( T+SQRT( T )*SQRT( T+S ) ) )
205 T = Z( 1 ) + ( S+Z( 2 ) )
206 Z( 3 ) = Z( 3 )*( Z( 1 ) / T )
210 Z( 6 ) = Z( 2 ) + Z( 1 )
214 * Check for negative data and compute sums of q's and e's.
223 DO 10 K = 1, 2*( N-1 ), 2
224 IF( Z( K ).LT.ZERO ) THEN
226 CALL XERBLA( 'DLASQ2', 2 )
228 ELSE IF( Z( K+1 ).LT.ZERO ) THEN
230 CALL XERBLA( 'DLASQ2', 2 )
235 QMAX = MAX( QMAX, Z( K ) )
236 EMIN = MIN( EMIN, Z( K+1 ) )
237 ZMAX = MAX( QMAX, ZMAX, Z( K+1 ) )
239 IF( Z( 2*N-1 ).LT.ZERO ) THEN
240 INFO = -( 200+2*N-1 )
241 CALL XERBLA( 'DLASQ2', 2 )
245 QMAX = MAX( QMAX, Z( 2*N-1 ) )
246 ZMAX = MAX( QMAX, ZMAX )
248 * Check for diagonality.
254 CALL DLASRT( 'D', N, Z, IINFO )
261 * Check for zero data.
263 IF( TRACE.EQ.ZERO ) THEN
268 * Check whether the machine is IEEE conformable.
270 IEEE = ILAENV( 10, 'DLASQ2', 'N', 1, 2, 3, 4 ).EQ.1 .AND.
271 $ ILAENV( 11, 'DLASQ2', 'N', 1, 2, 3, 4 ).EQ.1
273 * Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...).
279 Z( 2*K-3 ) = Z( K-1 )
285 * Reverse the qd-array, if warranted.
287 IF( CBIAS*Z( 4*I0-3 ).LT.Z( 4*N0-3 ) ) THEN
289 DO 40 I4 = 4*I0, 2*( I0+N0-1 ), 4
292 Z( I4-3 ) = Z( IPN4-I4-3 )
293 Z( IPN4-I4-3 ) = TEMP
295 Z( I4-1 ) = Z( IPN4-I4-5 )
296 Z( IPN4-I4-5 ) = TEMP
300 * Initial split checking via dqd and Li's test.
307 DO 50 I4 = 4*( N0-1 ) + PP, 4*I0 + PP, -4
308 IF( Z( I4-1 ).LE.TOL2*D ) THEN
312 D = Z( I4-3 )*( D / ( D+Z( I4-1 ) ) )
316 * dqd maps Z to ZZ plus Li's test.
318 EMIN = Z( 4*I0+PP+1 )
320 DO 60 I4 = 4*I0 + PP, 4*( N0-1 ) + PP, 4
321 Z( I4-2*PP-2 ) = D + Z( I4-1 )
322 IF( Z( I4-1 ).LE.TOL2*D ) THEN
327 ELSE IF( SAFMIN*Z( I4+1 ).LT.Z( I4-2*PP-2 ) .AND.
328 $ SAFMIN*Z( I4-2*PP-2 ).LT.Z( I4+1 ) ) THEN
329 TEMP = Z( I4+1 ) / Z( I4-2*PP-2 )
330 Z( I4-2*PP ) = Z( I4-1 )*TEMP
333 Z( I4-2*PP ) = Z( I4+1 )*( Z( I4-1 ) / Z( I4-2*PP-2 ) )
334 D = Z( I4+1 )*( D / Z( I4-2*PP-2 ) )
336 EMIN = MIN( EMIN, Z( I4-2*PP ) )
342 QMAX = Z( 4*I0-PP-2 )
343 DO 70 I4 = 4*I0 - PP + 2, 4*N0 - PP - 2, 4
344 QMAX = MAX( QMAX, Z( I4 ) )
347 * Prepare for the next iteration on K.
353 * Initialise variables to pass to DLASQ3.
368 DO 160 IWHILA = 1, N + 1
374 * While array unfinished do
376 * E(N0) holds the value of SIGMA when submatrix in I0:N0
377 * splits from the rest of the array, but is negated.
385 IF( SIGMA.LT.ZERO ) THEN
390 * Find last unreduced submatrix's top index I0, find QMAX and
391 * EMIN. Find Gershgorin-type bound if Q's much greater than E's.
395 EMIN = ABS( Z( 4*N0-5 ) )
401 DO 90 I4 = 4*N0, 8, -4
402 IF( Z( I4-5 ).LE.ZERO )
404 IF( QMIN.GE.FOUR*EMAX ) THEN
405 QMIN = MIN( QMIN, Z( I4-3 ) )
406 EMAX = MAX( EMAX, Z( I4-5 ) )
408 QMAX = MAX( QMAX, Z( I4-7 )+Z( I4-5 ) )
409 EMIN = MIN( EMIN, Z( I4-5 ) )
418 IF( N0-I0.GT.1 ) THEN
422 DO 110 I4 = 4*I0+1, 4*N0-3, 4
423 DEE = Z( I4 )*( DEE /( DEE+Z( I4-2 ) ) )
424 IF( DEE.LE.DEEMIN ) THEN
429 IF( (KMIN-I0)*2.LT.N0-KMIN .AND.
430 $ DEEMIN.LE.HALF*Z(4*N0-3) ) THEN
433 DO 120 I4 = 4*I0, 2*( I0+N0-1 ), 4
435 Z( I4-3 ) = Z( IPN4-I4-3 )
436 Z( IPN4-I4-3 ) = TEMP
438 Z( I4-2 ) = Z( IPN4-I4-2 )
439 Z( IPN4-I4-2 ) = TEMP
441 Z( I4-1 ) = Z( IPN4-I4-5 )
442 Z( IPN4-I4-5 ) = TEMP
444 Z( I4 ) = Z( IPN4-I4-4 )
445 Z( IPN4-I4-4 ) = TEMP
450 * Put -(initial shift) into DMIN.
452 DMIN = -MAX( ZERO, QMIN-TWO*SQRT( QMIN )*SQRT( EMAX ) )
454 * Now I0:N0 is unreduced.
455 * PP = 0 for ping, PP = 1 for pong.
456 * PP = 2 indicates that flipping was applied to the Z array and
457 * and that the tests for deflation upon entry in DLASQ3
458 * should not be performed.
460 NBIG = 100*( N0-I0+1 )
461 DO 140 IWHILB = 1, NBIG
467 ! Print out test cases
470 write(3,*) "i0: ", I0, ","
471 write(3,*) "n0: ", N0, ","
472 write(3,'(9999(g0))',advance="no") "z: []float64{"
474 write (3,'(99999(e24.16,a))',advance="no") z(i), ","
477 write (3,*) "pp: ", PP, ","
478 write (3,*) "dmin: ", DMIN, ","
479 write (3,*) "desig:", DESIG, ","
480 write (3,*) "qmax: ", QMAX, ","
481 write (3,*) "ttype:", TTYPE, ","
482 write (3,*) "dmin1:", DMIN1, ","
483 write (3,*) "dmin2:", DMIN2, ","
484 write (3,*) "dn: ", DN, ","
485 write (3,*) "dn1: ", DN1, ","
486 write (3,*) "dn2: ", DN2, ","
487 write (3,*) "g: ", G, ","
488 write (3,*) "tau: ", TAU, ","
489 write (3,*) "nFail:", NFAIL, ","
490 write (3,*) "iter: ", ITER, ","
491 write (3,*) "sigma:", SIGMA, ","
492 write (3,*) "nDiv: ", NDIV, ","
494 * While submatrix unfinished take a good dqds step.
498 CALL DLASQ3( I0, N0, Z, PP, DMIN, SIGMA, DESIG, QMAX, NFAIL,
499 $ ITER, NDIV, IEEE, TTYPE, DMIN1, DMIN2, DN, DN1,
504 write(3,'(9999(g0))',advance="no") "zOut: []float64{"
506 write (3,'(99999(e24.16,a))',advance="no") z(i), ","
509 write (3,*) "i0Out:",I0, ","
510 write (3,*) "n0Out:", N0, ","
511 write (3,*) "ppOut:", PP, ","
512 write (3,*) "dminOut:", DMIN, ","
513 write (3,*) "desigOut:", DESIG, ","
514 write (3,*) "sigmaOut:", SIGMA, ","
515 write (3,*) "qmaxOut:", QMAX, ","
516 write (3,*) "nFailOut:", NFAIL, ","
517 write (3,*) "iterOut:", ITER, ","
518 write (3,*) "nDivOut:", NDIV, ","
519 write (3,*) "ttypeOut:", TTYPE, ","
520 write (3,*) "dmin1Out:", DMIN1, ","
521 write (3,*) "dmin2Out:", DMIN2, ","
522 write (3,*) "dnOut:", DN, ","
523 write (3,*) "dn1Out:", DN1, ","
524 write (3,*) "dn2Out:", DN2, ","
525 write (3,*) "gOut:", G, ","
526 write (3,*) "tauOut:", TAU, ","
533 * When EMIN is very small check for splits.
535 IF( PP.EQ.0 .AND. N0-I0.GE.3 ) THEN
536 IF( Z( 4*N0 ).LE.TOL2*QMAX .OR.
537 $ Z( 4*N0-1 ).LE.TOL2*SIGMA ) THEN
542 DO 130 I4 = 4*I0, 4*( N0-3 ), 4
543 IF( Z( I4 ).LE.TOL2*Z( I4-3 ) .OR.
544 $ Z( I4-1 ).LE.TOL2*SIGMA ) THEN
551 QMAX = MAX( QMAX, Z( I4+1 ) )
552 EMIN = MIN( EMIN, Z( I4-1 ) )
553 OLDEMN = MIN( OLDEMN, Z( I4 ) )
566 * Maximum number of iterations exceeded, restore the shift
567 * SIGMA and place the new d's and e's in a qd array.
568 * This might need to be done for several blocks
575 Z( 4*I0-3 ) = Z( 4*I0-3 ) + SIGMA
578 Z( 4*K-5 ) = Z( 4*K-5 ) * (TEMPQ / Z( 4*K-7 ))
580 Z( 4*K-3 ) = Z( 4*K-3 ) + SIGMA + TEMPE - Z( 4*K-5 )
583 * Prepare to do this on the previous block if there is one
587 DO WHILE( ( I1.GE.2 ) .AND. ( Z(4*I1-5).GE.ZERO ) )
595 Z( 2*K-1 ) = Z( 4*K-3 )
597 * Only the block 1..N0 is unfinished. The rest of the e's
598 * must be essentially zero, although sometimes other data
599 * has been stored in them.
602 Z( 2*K ) = Z( 4*K-1 )
623 * Move q's to the front.
629 * Sort and compute sum of eigenvalues.
631 CALL DLASRT( 'D', N, Z, IINFO )
639 * Store trace, sum(eigenvalues) and information on performance.
644 Z( 2*N+3 ) = DBLE( ITER )
645 Z( 2*N+4 ) = DBLE( NDIV ) / DBLE( N**2 )
646 Z( 2*N+5 ) = HUNDRD*NFAIL / DBLE( ITER )