|
SRC\pdlaed3.f |
|
| #lines: 360 size: 11 Kb creation: 18/01/2006 23:36:04 last modification: 08/05/2008 18:37:53 attribute: ARCH Find Reload | |
1: 2: 3: 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 16: 17: 18: 19: 20: 21: 22: 23: 24: 25: 26: 27: 28: 29: 30: 31: 32: 33: 34: 35: 36: 37: 38: 39: 40: 41: 42: 43: 44: 45: 46: 47: 48: 49: 50: 51: 52: 53: 54: 55: 56: 57: 58: 59: 60: 61: 62: 63: 64: 65: 66: 67: 68: 69: 70: 71: 72: 73: 74: 75: 76: 77: 78: 79: 80: 81: 82: 83: 84: 85: 86: 87: 88: 89: 90: 91: 92: 93: 94: 95: 96: 97: 98: 99: 100: 101: 102: 103: 104: 105: 106: 107: 108: 109: 110: 111: 112: 113: 114: 115: 116: 117: 118: 119: 120: 121: 122: 123: 124: 125: 126: 127: 128: 129: 130: 131: 132: 133: 134: 135: 136: 137: 138: 139: 140: 141: 142: 143: 144: 145: 146: 147: 148: 149: 150: 151: 152: 153: 154: 155: 156: 157: 158: 159: 160: 161: 162: 163: 164: 165: 166: 167: 168: 169: 170: 171: 172: 173: 174: 175: 176: 177: 178: 179: 180: 181: 182: 183: 184: 185: 186: 187: 188: 189: 190: 191: 192: 193: 194: 195: 196: 197: 198: 199: 200: 201: 202: 203: 204: 205: 206: 207: 208: 209: 210: 211: 212: 213: 214: 215: 216: 217: 218: 219: 220: 221: 222: 223: 224: 225: 226: 227: 228: 229: 230: 231: 232: 233: 234: 235: 236: 237: 238: 239: 240: 241: 242: 243: 244: 245: 246: 247: 248: 249: 250: 251: 252: 253: 254: 255: 256: 257: 258: 259: 260: 261: 262: 263: 264: 265: 266: 267: 268: 269: 270: 271: 272: 273: 274: 275: 276: 277: 278: 279: 280: 281: 282: 283: 284: 285: 286: 287: 288: 289: 290: 291: 292: 293: 294: 295: 296: 297: 298: 299: 300: 301: 302: 303: 304: 305: 306: 307: 308: 309: 310: 311: 312: 313: 314: 315: 316: 317: 318: 319: 320: 321: 322: 323: 324: 325: 326: 327: 328: 329: 330: 331: 332: 333: 334: 335: 336: 337: 338: 339: 340: 341: 342: 343: 344: 345: 346: 347: 348: 349: 350: 351: 352: 353: 354: 355: 356: 357: 358: 359: 360: |
SUBROUTINE PDLAED3( ICTXT, K, N, NB, D, DROW, DCOL, RHO, DLAMDA,
$ W, Z, U, LDU, BUF, INDX, INDCOL, INDROW,
$ INDXR, INDXC, CTOT, NPCOL, INFO )
*
* -- ScaLAPACK auxiliary routine (version 1.7) --
* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
* and University of California, Berkeley.
* December 31, 1998
*
* .. Scalar Arguments ..
INTEGER DCOL, DROW, ICTXT, INFO, K, LDU, N, NB, NPCOL
DOUBLE PRECISION RHO
* ..
* .. Array Arguments ..
INTEGER CTOT( 0: NPCOL-1, 4 ), INDCOL( * ),
$ INDROW( * ), INDX( * ), INDXC( * ), INDXR( * )
DOUBLE PRECISION BUF( * ), D( * ), DLAMDA( * ), U( LDU, * ),
$ W( * ), Z( * )
* ..
*
* Purpose
* =======
*
* PDLAED3 finds the roots of the secular equation, as defined by the
* values in D, W, and RHO, between 1 and K. It makes the
* appropriate calls to SLAED4
*
* This code makes very mild assumptions about floating point
* arithmetic. It will work on machines with a guard digit in
* add/subtract, or on those binary machines without guard digits
* which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
* It could conceivably fail on hexadecimal or decimal machines
* without guard digits, but we know of none.
*
* Arguments
* =========
*
* ICTXT (global input) INTEGER
* The BLACS context handle, indicating the global context of
* the operation on the matrix. The context itself is global.
*
* K (output) INTEGER
* The number of non-deflated eigenvalues, and the order of the
* related secular equation. 0 <= K <=N.
*
* N (input) INTEGER
* The dimension of the symmetric tridiagonal matrix. N >= 0.
*
* NB (global input) INTEGER
* The blocking factor used to distribute the columns of the
* matrix. NB >= 1.
*
* D (input/output) DOUBLE PRECISION array, dimension (N)
* On entry, D contains the eigenvalues of the two submatrices to
* be combined.
* On exit, D contains the trailing (N-K) updated eigenvalues
* (those which were deflated) sorted into increasing order.
*
* DROW (global input) INTEGER
* The process row over which the first row of the matrix D is
* distributed. 0 <= DROW < NPROW.
*
* DCOL (global input) INTEGER
* The process column over which the first column of the
* matrix D is distributed. 0 <= DCOL < NPCOL.
*
* Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
* On entry, Q contains the eigenvectors of two submatrices in
* the two square blocks with corners at (1,1), (N1,N1)
* and (N1+1, N1+1), (N,N).
* On exit, Q contains the trailing (N-K) updated eigenvectors
* (those which were deflated) in its last N-K columns.
*
* LDQ (input) INTEGER
* The leading dimension of the array Q. LDQ >= max(1,NQ).
*
* RHO (global input/output) DOUBLE PRECISION
* On entry, the off-diagonal element associated with the rank-1
* cut which originally split the two submatrices which are now
* being recombined.
* On exit, RHO has been modified to the value required by
* PDLAED3.
*
* DLAMDA (global output) DOUBLE PRECISION array, dimension (N)
* A copy of the first K eigenvalues which will be used by
* SLAED3 to form the secular equation.
*
* W (global output) DOUBLE PRECISION array, dimension (N)
* The first k values of the final deflation-altered z-vector
* which will be passed to SLAED3.
*
* Z (global input) DOUBLE PRECISION array, dimension (N)
* On entry, Z contains the updating vector (the last
* row of the first sub-eigenvector matrix and the first row of
* the second sub-eigenvector matrix).
* On exit, the contents of Z have been destroyed by the updating
* process.
*
* U (global output) DOUBLE PRECISION array
* global dimension (N, N), local dimension (LDU, NQ).
* Q contains the orthonormal eigenvectors of the symmetric
* tridiagonal matrix.
*
* LDU (input) INTEGER
* The leading dimension of the array U.
*
* QBUF (workspace) DOUBLE PRECISION array, dimension 3*N
*
*
* INDX (workspace) INTEGER array, dimension (N)
* The permutation used to sort the contents of DLAMDA into
* ascending order.
*
* INDCOL (workspace) INTEGER array, dimension (N)
*
*
* INDROW (workspace) INTEGER array, dimension (N)
*
*
* INDXR (workspace) INTEGER array, dimension (N)
*
*
* INDXC (workspace) INTEGER array, dimension (N)
*
* CTOT (workspace) INTEGER array, dimension( NPCOL, 4)
*
* NPCOL (global input) INTEGER
* The total number of columns over which the distributed
* submatrix is distributed.
*
* INFO (output) INTEGER
* = 0: successful exit.
* < 0: if INFO = -i, the i-th argument had an illegal value.
* > 0: The algorithm failed to compute the ith eigenvalue.
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE
PARAMETER ( ONE = 1.0D+0 )
* ..
* .. Local Scalars ..
INTEGER COL, GI, I, IINFO, IIU, IPD, IU, J, JJU, JU,
$ KK, KL, KLC, KLR, MYCOL, MYKL, MYKLR, MYROW,
$ NPROW, PDC, PDR, ROW
DOUBLE PRECISION AUX, TEMP
* ..
* .. External Functions ..
INTEGER INDXG2L
DOUBLE PRECISION DLAMC3, DNRM2
EXTERNAL INDXG2L, DLAMC3, DNRM2
* ..
* .. External Subroutines ..
EXTERNAL BLACS_GRIDINFO, DCOPY, DGEBR2D, DGEBS2D,
$ DGERV2D, DGESD2D, DLAED4
* ..
* .. Intrinsic Functions ..
INTRINSIC MOD, SIGN, SQRT
* ..
* .. Executable Statements ..
*
* Test the input parameters.
*
INFO = 0
*
* Quick return if possible
*
IF( K.EQ.0 )
$ RETURN
*
CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
*
ROW = DROW
COL = DCOL
DO 20 I = 1, N, NB
DO 10 J = 0, NB - 1
IF( I+J.LE.N ) THEN
INDROW( I+J ) = ROW
INDCOL( I+J ) = COL
END IF
10 CONTINUE
ROW = MOD( ROW+1, NPROW )
COL = MOD( COL+1, NPCOL )
20 CONTINUE
*
MYKL = CTOT( MYCOL, 1 ) + CTOT( MYCOL, 2 ) + CTOT( MYCOL, 3 )
KLR = MYKL / NPROW
IF( MYROW.EQ.DROW ) THEN
MYKLR = KLR + MOD( MYKL, NPROW )
ELSE
MYKLR = KLR
END IF
PDC = 1
COL = DCOL
30 CONTINUE
IF( MYCOL.NE.COL ) THEN
PDC = PDC + CTOT( COL, 1 ) + CTOT( COL, 2 ) + CTOT( COL, 3 )
COL = MOD( COL+1, NPCOL )
GO TO 30
END IF
PDR = PDC
KL = KLR + MOD( MYKL, NPROW )
ROW = DROW
40 CONTINUE
IF( MYROW.NE.ROW ) THEN
PDR = PDR + KL
KL = KLR
ROW = MOD( ROW+1, NPROW )
GO TO 40
END IF
*
DO 50 I = 1, K
DLAMDA( I ) = DLAMC3( DLAMDA( I ), DLAMDA( I ) ) - DLAMDA( I )
Z( I ) = ONE
50 CONTINUE
IF( MYKLR.GT.0 ) THEN
KK = PDR
DO 80 I = 1, MYKLR
CALL DLAED4( K, KK, DLAMDA, W, BUF, RHO, BUF( K+I ), IINFO )
IF( IINFO.NE.0 ) THEN
INFO = KK
END IF
*
* ..Compute part of z
*
DO 60 J = 1, KK - 1
Z( J ) = Z( J )*( BUF( J ) /
$ ( DLAMDA( J )-DLAMDA( KK ) ) )
60 CONTINUE
Z( KK ) = Z( KK )*BUF( KK )
DO 70 J = KK + 1, K
Z( J ) = Z( J )*( BUF( J ) /
$ ( DLAMDA( J )-DLAMDA( KK ) ) )
70 CONTINUE
KK = KK + 1
80 CONTINUE
*
IF( MYROW.NE.DROW ) THEN
CALL DCOPY( K, Z, 1, BUF, 1 )
CALL DGESD2D( ICTXT, K+MYKLR, 1, BUF, K+MYKLR, DROW, MYCOL )
ELSE
IPD = 2*K + 1
CALL DCOPY( MYKLR, BUF( K+1 ), 1, BUF( IPD ), 1 )
IF( KLR.GT.0 ) THEN
IPD = MYKLR + IPD
ROW = MOD( DROW+1, NPROW )
DO 100 I = 1, NPROW - 1
CALL DGERV2D( ICTXT, K+KLR, 1, BUF, K+KLR, ROW,
$ MYCOL )
CALL DCOPY( KLR, BUF( K+1 ), 1, BUF( IPD ), 1 )
DO 90 J = 1, K
Z( J ) = Z( J )*BUF( J )
90 CONTINUE
IPD = IPD + KLR
ROW = MOD( ROW+1, NPROW )
100 CONTINUE
END IF
END IF
END IF
*
IF( MYROW.EQ.DROW ) THEN
IF( MYCOL.NE.DCOL .AND. MYKL.NE.0 ) THEN
CALL DCOPY( K, Z, 1, BUF, 1 )
CALL DCOPY( MYKL, BUF( 2*K+1 ), 1, BUF( K+1 ), 1 )
CALL DGESD2D( ICTXT, K+MYKL, 1, BUF, K+MYKL, MYROW, DCOL )
ELSE IF( MYCOL.EQ.DCOL ) THEN
IPD = 2*K + 1
COL = DCOL
KL = MYKL
DO 120 I = 1, NPCOL - 1
IPD = IPD + KL
COL = MOD( COL+1, NPCOL )
KL = CTOT( COL, 1 ) + CTOT( COL, 2 ) + CTOT( COL, 3 )
IF( KL.NE.0 ) THEN
CALL DGERV2D( ICTXT, K+KL, 1, BUF, K+KL, MYROW, COL )
CALL DCOPY( KL, BUF( K+1 ), 1, BUF( IPD ), 1 )
DO 110 J = 1, K
Z( J ) = Z( J )*BUF( J )
110 CONTINUE
END IF
120 CONTINUE
DO 130 I = 1, K
Z( I ) = SIGN( SQRT( -Z( I ) ), W( I ) )
130 CONTINUE
*
END IF
END IF
*
* Diffusion
*
IF( MYROW.EQ.DROW .AND. MYCOL.EQ.DCOL ) THEN
CALL DCOPY( K, Z, 1, BUF, 1 )
CALL DCOPY( K, BUF( 2*K+1 ), 1, BUF( K+1 ), 1 )
CALL DGEBS2D( ICTXT, 'All', ' ', 2*K, 1, BUF, 2*K )
ELSE
CALL DGEBR2D( ICTXT, 'All', ' ', 2*K, 1, BUF, 2*K, DROW, DCOL )
CALL DCOPY( K, BUF, 1, Z, 1 )
END IF
*
* Copy of D at the good place
*
KLC = 0
KLR = 0
DO 140 I = 1, K
GI = INDX( I )
D( GI ) = BUF( K+I )
COL = INDCOL( GI )
ROW = INDROW( GI )
IF( COL.EQ.MYCOL ) THEN
KLC = KLC + 1
INDXC( KLC ) = I
END IF
IF( ROW.EQ.MYROW ) THEN
KLR = KLR + 1
INDXR( KLR ) = I
END IF
140 CONTINUE
*
* Compute eigenvectors of the modified rank-1 modification.
*
IF( MYKL.NE.0 ) THEN
DO 180 J = 1, MYKL
KK = INDXC( J )
JU = INDX( KK )
JJU = INDXG2L( JU, NB, J, J, NPCOL )
CALL DLAED4( K, KK, DLAMDA, W, BUF, RHO, AUX, IINFO )
IF( IINFO.NE.0 ) THEN
INFO = KK
END IF
IF( K.EQ.1 .OR. K.EQ.2 ) THEN
DO 150 I = 1, KLR
KK = INDXR( I )
IU = INDX( KK )
IIU = INDXG2L( IU, NB, J, J, NPROW )
U( IIU, JJU ) = BUF( KK )
150 CONTINUE
GO TO 180
END IF
*
DO 160 I = 1, K
BUF( I ) = Z( I ) / BUF( I )
160 CONTINUE
TEMP = DNRM2( K, BUF, 1 )
DO 170 I = 1, KLR
KK = INDXR( I )
IU = INDX( KK )
IIU = INDXG2L( IU, NB, J, J, NPROW )
U( IIU, JJU ) = BUF( KK ) / TEMP
170 CONTINUE
*
180 CONTINUE
END IF
*
190 CONTINUE
*
RETURN
*
* End of PDLAED3
*
END
|