SRC\pslaed3.f

#lines: 356   size: 11 Kb   creation: 18/01/2006 23:36:04   last modification: 08/05/2008 18:38:04   attribute: ARCH    Find   Reload  
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67:
68:
69:
70:
71:
72:
73:
74:
75:
76:
77:
78:
79:
80:
81:
82:
83:
84:
85:
86:
87:
88:
89:
90:
91:
92:
93:
94:
95:
96:
97:
98:
99:
100:
101:
102:
103:
104:
105:
106:
107:
108:
109:
110:
111:
112:
113:
114:
115:
116:
117:
118:
119:
120:
121:
122:
123:
124:
125:
126:
127:
128:
129:
130:
131:
132:
133:
134:
135:
136:
137:
138:
139:
140:
141:
142:
143:
144:
145:
146:
147:
148:
149:
150:
151:
152:
153:
154:
155:
156:
157:
158:
159:
160:
161:
162:
163:
164:
165:
166:
167:
168:
169:
170:
171:
172:
173:
174:
175:
176:
177:
178:
179:
180:
181:
182:
183:
184:
185:
186:
187:
188:
189:
190:
191:
192:
193:
194:
195:
196:
197:
198:
199:
200:
201:
202:
203:
204:
205:
206:
207:
208:
209:
210:
211:
212:
213:
214:
215:
216:
217:
218:
219:
220:
221:
222:
223:
224:
225:
226:
227:
228:
229:
230:
231:
232:
233:
234:
235:
236:
237:
238:
239:
240:
241:
242:
243:
244:
245:
246:
247:
248:
249:
250:
251:
252:
253:
254:
255:
256:
257:
258:
259:
260:
261:
262:
263:
264:
265:
266:
267:
268:
269:
270:
271:
272:
273:
274:
275:
276:
277:
278:
279:
280:
281:
282:
283:
284:
285:
286:
287:
288:
289:
290:
291:
292:
293:
294:
295:
296:
297:
298:
299:
300:
301:
302:
303:
304:
305:
306:
307:
308:
309:
310:
311:
312:
313:
314:
315:
316:
317:
318:
319:
320:
321:
322:
323:
324:
325:
326:
327:
328:
329:
330:
331:
332:
333:
334:
335:
336:
337:
338:
339:
340:
341:
342:
343:
344:
345:
346:
347:
348:
349:
350:
351:
352:
353:
354:
355:
356:
      SUBROUTINE PSLAED3( ICTXT, K, N, NB, D, DROW, DCOL, RHO, DLAMDA,
     $                    W, Z, U, LDU, BUF, INDX, INDCOL, INDROW,
     $                    INDXR, INDXC, CTOT, NPCOL, INFO )
*
*  -- ScaLAPACK auxiliary routine (version 1.7) --
*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
*     and University of California, Berkeley.
*     December 31, 1998
*
*     .. Scalar Arguments ..
      INTEGER            DCOL, DROW, ICTXT, INFO, K, LDU, N, NB, NPCOL
      REAL               RHO
*     ..
*     .. Array Arguments ..
      INTEGER            CTOT( 0: NPCOL-1, 4 ), INDCOL( * ),
     $                   INDROW( * ), INDX( * ), INDXC( * ), INDXR( * )
      REAL               BUF( * ), D( * ), DLAMDA( * ), U( LDU, * ),
     $                   W( * ), Z( * )
*     ..
*
*  Purpose
*  =======
*
*  PSLAED3 finds the roots of the secular equation, as defined by the
*  values in D, W, and RHO, between 1 and K.  It makes the
*  appropriate calls to SLAED4
*
*  This code makes very mild assumptions about floating point
*  arithmetic. It will work on machines with a guard digit in
*  add/subtract, or on those binary machines without guard digits
*  which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
*  It could conceivably fail on hexadecimal or decimal machines
*  without guard digits, but we know of none.
*
*  Arguments
*  =========
*
*  ICTXT  (global input) INTEGER
*         The BLACS context handle, indicating the global context of
*         the operation on the matrix. The context itself is global.
*
*  K      (output) INTEGER
*         The number of non-deflated eigenvalues, and the order of the
*         related secular equation. 0 <= K <=N.
*
*  N      (input) INTEGER
*         The dimension of the symmetric tridiagonal matrix.  N >= 0.
*
*  NB      (global input) INTEGER
*          The blocking factor used to distribute the columns of the
*          matrix. NB >= 1.
*
*  D      (input/output) REAL array, dimension (N)
*         On entry, D contains the eigenvalues of the two submatrices to
*         be combined.
*         On exit, D contains the trailing (N-K) updated eigenvalues
*         (those which were deflated) sorted into increasing order.
*
*  DROW   (global input) INTEGER
*          The process row over which the first row of the matrix D is
*          distributed. 0 <= DROW < NPROW.
*
*  DCOL   (global input) INTEGER
*          The process column over which the first column of the
*          matrix D is distributed. 0 <= DCOL < NPCOL.
*
*  Q      (input/output) REAL array, dimension (LDQ, N)
*         On entry, Q contains the eigenvectors of two submatrices in
*         the two square blocks with corners at (1,1), (N1,N1)
*         and (N1+1, N1+1), (N,N).
*         On exit, Q contains the trailing (N-K) updated eigenvectors
*         (those which were deflated) in its last N-K columns.
*
*  LDQ    (input) INTEGER
*         The leading dimension of the array Q.  LDQ >= max(1,NQ).
*
*  RHO    (global input/output) REAL
*         On entry, the off-diagonal element associated with the rank-1
*         cut which originally split the two submatrices which are now
*         being recombined.
*         On exit, RHO has been modified to the value required by
*         PSLAED3.
*
*  DLAMDA (global output) REAL array, dimension (N)
*         A copy of the first K eigenvalues which will be used by
*         SLAED3 to form the secular equation.
*
*  W      (global output) REAL array, dimension (N)
*         The first k values of the final deflation-altered z-vector
*         which will be passed to SLAED3.
*
*  Z      (global input) REAL array, dimension (N)
*         On entry, Z contains the updating vector (the last
*         row of the first sub-eigenvector matrix and the first row of
*         the second sub-eigenvector matrix).
*         On exit, the contents of Z have been destroyed by the updating
*         process.
*
*  U     (global output) REAL array
*         global dimension (N, N), local dimension (LDU, NQ).
*         Q  contains the orthonormal eigenvectors of the symmetric
*         tridiagonal matrix.
*
*  LDU    (input) INTEGER
*         The leading dimension of the array U.
*
*  QBUF   (workspace) REAL array, dimension 3*N
*
*
*  INDX   (workspace) INTEGER array, dimension (N)
*         The permutation used to sort the contents of DLAMDA into
*         ascending order.
*
*  INDCOL (workspace) INTEGER array, dimension (N)
*
*
*  INDROW (workspace) INTEGER array, dimension (N)
*
*
*  INDXR (workspace) INTEGER array, dimension (N)
*
*
*  INDXC (workspace) INTEGER array, dimension (N)
*
*  CTOT   (workspace) INTEGER array, dimension( NPCOL, 4)
*
*  NPCOL   (global input) INTEGER
*          The total number of columns over which the distributed
*           submatrix is distributed.
*
*  INFO   (output) INTEGER
*          = 0:  successful exit.
*          < 0:  if INFO = -i, the i-th argument had an illegal value.
*          > 0:  The algorithm failed to compute the ith eigenvalue.
*
*  =====================================================================
*
*     .. Parameters ..
      REAL               ONE
      PARAMETER          ( ONE = 1.0E0 )
*     ..
*     .. Local Scalars ..
      INTEGER            COL, GI, I, IINFO, IIU, IPD, IU, J, JJU, JU,
     $                   KK, KL, KLC, KLR, MYCOL, MYKL, MYKLR, MYROW,
     $                   NPROW, PDC, PDR, ROW
      REAL               AUX, TEMP
*     ..
*     .. External Functions ..
      INTEGER            INDXG2L
      REAL               SLAMC3, SNRM2
      EXTERNAL           INDXG2L, SLAMC3, SNRM2
*     ..
*     .. External Subroutines ..
      EXTERNAL           BLACS_GRIDINFO, SCOPY, SGEBR2D, SGEBS2D,
     $                   SGERV2D, SGESD2D, SLAED4
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          MOD, SIGN, SQRT
*     ..
*     .. Executable Statements ..
*
*     Test the input parameters.
*
      IINFO = 0
*
*     Quick return if possible
*
      IF( K.EQ.0 )
     $   RETURN
*
      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
*
      ROW = DROW
      COL = DCOL
      DO 20 I = 1, N, NB
         DO 10 J = 0, NB - 1
            INDROW( I+J ) = ROW
            INDCOL( I+J ) = COL
   10    CONTINUE
         ROW = MOD( ROW+1, NPROW )
         COL = MOD( COL+1, NPCOL )
   20 CONTINUE
*
      MYKL = CTOT( MYCOL, 1 ) + CTOT( MYCOL, 2 ) + CTOT( MYCOL, 3 )
      KLR = MYKL / NPROW
      IF( MYROW.EQ.DROW ) THEN
         MYKLR = KLR + MOD( MYKL, NPROW )
      ELSE
         MYKLR = KLR
      END IF
      PDC = 1
      COL = DCOL
   30 CONTINUE
      IF( MYCOL.NE.COL ) THEN
         PDC = PDC + CTOT( COL, 1 ) + CTOT( COL, 2 ) + CTOT( COL, 3 )
         COL = MOD( COL+1, NPCOL )
         GO TO 30
      END IF
      PDR = PDC
      KL = KLR + MOD( MYKL, NPROW )
      ROW = DROW
   40 CONTINUE
      IF( MYROW.NE.ROW ) THEN
         PDR = PDR + KL
         KL = KLR
         ROW = MOD( ROW+1, NPROW )
         GO TO 40
      END IF
*
      DO 50 I = 1, K
         DLAMDA( I ) = SLAMC3( DLAMDA( I ), DLAMDA( I ) ) - DLAMDA( I )
         Z( I ) = ONE
   50 CONTINUE
      IF( MYKLR.GT.0 ) THEN
         KK = PDR
         DO 80 I = 1, MYKLR
            CALL SLAED4( K, KK, DLAMDA, W, BUF, RHO, BUF( K+I ), IINFO )
            IF( IINFO.NE.0 ) THEN
               INFO = KK
            END IF
*
*     ..Compute part of z
*
            DO 60 J = 1, KK - 1
               Z( J ) = Z( J )*( BUF( J ) /
     $                  ( DLAMDA( J )-DLAMDA( KK ) ) )
   60       CONTINUE
            Z( KK ) = Z( KK )*BUF( KK )
            DO 70 J = KK + 1, K
               Z( J ) = Z( J )*( BUF( J ) /
     $                  ( DLAMDA( J )-DLAMDA( KK ) ) )
   70       CONTINUE
            KK = KK + 1
   80    CONTINUE
*
         IF( MYROW.NE.DROW ) THEN
            CALL SCOPY( K, Z, 1, BUF, 1 )
            CALL SGESD2D( ICTXT, K+MYKLR, 1, BUF, K+MYKLR, DROW, MYCOL )
         ELSE
            IPD = 2*K + 1
            CALL SCOPY( MYKLR, BUF( K+1 ), 1, BUF( IPD ), 1 )
            IF( KLR.GT.0 ) THEN
               IPD = MYKLR + IPD
               ROW = MOD( DROW+1, NPROW )
               DO 100 I = 1, NPROW - 1
                  CALL SGERV2D( ICTXT, K+KLR, 1, BUF, K+KLR, ROW,
     $                          MYCOL )
                  CALL SCOPY( KLR, BUF( K+1 ), 1, BUF( IPD ), 1 )
                  DO 90 J = 1, K
                     Z( J ) = Z( J )*BUF( J )
   90             CONTINUE
                  IPD = IPD + KLR
                  ROW = MOD( ROW+1, NPROW )
  100          CONTINUE
            END IF
         END IF
      END IF
*
      IF( MYROW.EQ.DROW ) THEN
         IF( MYCOL.NE.DCOL .AND. MYKL.NE.0 ) THEN
            CALL SCOPY( K, Z, 1, BUF, 1 )
            CALL SCOPY( MYKL, BUF( 2*K+1 ), 1, BUF( K+1 ), 1 )
            CALL SGESD2D( ICTXT, K+MYKL, 1, BUF, K+MYKL, MYROW, DCOL )
         ELSE IF( MYCOL.EQ.DCOL ) THEN
            IPD = 2*K + 1
            COL = DCOL
            KL = MYKL
            DO 120 I = 1, NPCOL - 1
               IPD = IPD + KL
               COL = MOD( COL+1, NPCOL )
               KL = CTOT( COL, 1 ) + CTOT( COL, 2 ) + CTOT( COL, 3 )
               IF( KL.NE.0 ) THEN
                  CALL SGERV2D( ICTXT, K+KL, 1, BUF, K+KL, MYROW, COL )
                  CALL SCOPY( KL, BUF( K+1 ), 1, BUF( IPD ), 1 )
                  DO 110 J = 1, K
                     Z( J ) = Z( J )*BUF( J )
  110             CONTINUE
               END IF
  120       CONTINUE
            DO 130 I = 1, K
               Z( I ) = SIGN( SQRT( -Z( I ) ), W( I ) )
  130       CONTINUE
*
         END IF
      END IF
*
*     Diffusion
*
      IF( MYROW.EQ.DROW .AND. MYCOL.EQ.DCOL ) THEN
         CALL SCOPY( K, Z, 1, BUF, 1 )
         CALL SCOPY( K, BUF( 2*K+1 ), 1, BUF( K+1 ), 1 )
         CALL SGEBS2D( ICTXT, 'All', ' ', 2*K, 1, BUF, 2*K )
      ELSE
         CALL SGEBR2D( ICTXT, 'All', ' ', 2*K, 1, BUF, 2*K, DROW, DCOL )
         CALL SCOPY( K, BUF, 1, Z, 1 )
      END IF
*
*     Copy of D at the good place
*
      KLC = 0
      KLR = 0
      DO 140 I = 1, K
         GI = INDX( I )
         D( GI ) = BUF( K+I )
         COL = INDCOL( GI )
         ROW = INDROW( GI )
         IF( COL.EQ.MYCOL ) THEN
            KLC = KLC + 1
            INDXC( KLC ) = I
         END IF
         IF( ROW.EQ.MYROW ) THEN
            KLR = KLR + 1
            INDXR( KLR ) = I
         END IF
  140 CONTINUE
*
*     Compute eigenvectors of the modified rank-1 modification.
*
      IF( MYKL.NE.0 ) THEN
         DO 180 J = 1, MYKL
            KK = INDXC( J )
            JU = INDX( KK )
            JJU = INDXG2L( JU, NB, J, J, NPCOL )
            CALL SLAED4( K, KK, DLAMDA, W, BUF, RHO, AUX, IINFO )
            IF( IINFO.NE.0 ) THEN
               INFO = KK
            END IF
            IF( K.EQ.1 .OR. K.EQ.2 ) THEN
               DO 150 I = 1, KLR
                  KK = INDXR( I )
                  IU = INDX( KK )
                  IIU = INDXG2L( IU, NB, J, J, NPROW )
                  U( IIU, JJU ) = BUF( KK )
  150          CONTINUE
               GO TO 180
            END IF
*
            DO 160 I = 1, K
               BUF( I ) = Z( I ) / BUF( I )
  160       CONTINUE
            TEMP = SNRM2( K, BUF, 1 )
            DO 170 I = 1, KLR
               KK = INDXR( I )
               IU = INDX( KK )
               IIU = INDXG2L( IU, NB, J, J, NPROW )
               U( IIU, JJU ) = BUF( KK ) / TEMP
  170       CONTINUE
  180    CONTINUE
      END IF
*
  190 CONTINUE
      RETURN
*
*     End of PSLAED3
*
      END