Routine: PDLAED2()  File: SRC\pdlaed2.f

 
 
# lines: 455
  # code: 455
  # comment: 0
  # blank:0
# Variables:72
# Callers:1
# Callings:2
# Words:205
# Keywords:135
 

 

..
     .. Array Arguments ..
     ..
  Purpose
  =======
  PDLAED2 sorts the two sets of eigenvalues together into a single
  sorted set.  Then it tries to deflate the size of the problem.
  There are two ways in which deflation can occur:  when two or more
  eigenvalues are close together or if there is a tiny entry in the
  Z vector.  For each such occurrence the order of the related secular
  equation problem is reduced by one.
  Arguments
  =========
  ICTXT  (global input) INTEGER
         The BLACS context handle, indicating the global context of
         the operation on the matrix. The context itself is global.
  K      (output) INTEGER
         The number of non-deflated eigenvalues, and the order of the
         related secular equation. 0 <= K <=N.
  N      (input) INTEGER
         The dimension of the symmetric tridiagonal matrix.  N >= 0.
  N1     (input) INTEGER
         The location of the last eigenvalue in the leading sub-matrix.
         min(1,N) < N1 < N.
  NB      (global input) INTEGER
          The blocking factor used to distribute the columns of the
          matrix. NB >= 1.
  D      (input/output) DOUBLE PRECISION array, dimension (N)
         On entry, D contains the eigenvalues of the two submatrices to
         be combined.
         On exit, D contains the trailing (N-K) updated eigenvalues
         (those which were deflated) sorted into increasing order.
  DROW   (global input) INTEGER
          The process row over which the first row of the matrix D is
          distributed. 0 <= DROW < NPROW.
  DCOL   (global input) INTEGER
          The process column over which the first column of the
          matrix D is distributed. 0 <= DCOL < NPCOL.
  Q      (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
         On entry, Q contains the eigenvectors of two submatrices in
         the two square blocks with corners at (1,1), (N1,N1)
         and (N1+1, N1+1), (N,N).
         On exit, Q contains the trailing (N-K) updated eigenvectors
         (those which were deflated) in its last N-K columns.
  LDQ    (input) INTEGER
         The leading dimension of the array Q.  LDQ >= max(1,NQ).
  RHO    (global input/output) DOUBLE PRECISION
         On entry, the off-diagonal element associated with the rank-1
         cut which originally split the two submatrices which are now
         being recombined.
         On exit, RHO has been modified to the value required by
         PDLAED3.
  Z      (global input) DOUBLE PRECISION array, dimension (N)
         On entry, Z contains the updating vector (the last
         row of the first sub-eigenvector matrix and the first row of
         the second sub-eigenvector matrix).
         On exit, the contents of Z have been destroyed by the updating
         process.
  DLAMDA (global output) DOUBLE PRECISION array, dimension (N)
         A copy of the first K eigenvalues which will be used by
         SLAED3 to form the secular equation.
  W      (global output) DOUBLE PRECISION array, dimension (N)
         The first k values of the final deflation-altered z-vector
         which will be passed to SLAED3.
  Q2     (output) DOUBLE PRECISION array, dimension (LDQ2, NQ)
         A copy of the first K eigenvectors which will be used by
  LDQ2    (input) INTEGER
         The leading dimension of the array Q2.
  QBUF   (workspace) DOUBLE PRECISION array, dimension 3*N
  CTOT   (workspace) INTEGER array, dimension( NPCOL, 4)
  PSM    (workspace) INTEGER array, dimension( NPCOL, 4)
  NPCOL   (global input) INTEGER
          The total number of columns over which the distributed
           submatrix is distributed.
  INDX   (workspace) INTEGER array, dimension (N)
         The permutation used to sort the contents of DLAMDA into
         ascending order.
  INDXC  (output) INTEGER array, dimension (N)
         The permutation used to arrange the columns of the deflated
         Q matrix into three groups:  the first group contains non-zero
         elements only at and above N1, the second contains
         non-zero elements only below N1, and the third is dense.
  INDXP  (workspace) INTEGER array, dimension (N)
         The permutation used to place deflated values of D at the end
         of the array.  INDXP(1:K) points to the nondeflated D-values
         and INDXP(K+1:N) points to the deflated eigenvalues.
  INDCOL (workspace) INTEGER array, dimension (N)
  COLTYP (workspace/output) INTEGER array, dimension (N)
         During execution, a label which will indicate which of the
         following types a column in the Q2 matrix is:
         1 : non-zero in the upper half only;
         2 : dense;
         3 : non-zero in the lower half only;
         4 : deflated.
  NN     (global output) INTEGER, the order of matrix U, (PDLAED1).
  NN1    (global output) INTEGER, the order of matrix Q1, (PDLAED1).
  NN2    (global output) INTEGER, the order of matrix Q2, (PDLAED1).
  IB1    (global output) INTEGER, pointeur on Q1, (PDLAED1).
  IB2    (global output) INTEGER, pointeur on Q2, (PDLAED1).
  =====================================================================
     .. Parameters ..

 
Display dynamic version Find AutoScroll Reload FontSize: - + Hide Comments Hide Blanks Frame FullScreen MailPrint

 
001        SUBROUTINE PDLAED2( ICTXT , K , N , N1 , NB , D , DROW , DCOL , Q , LDQ ,
002       $RHO , Z , W , DLAMDA , Q2 , LDQ2 , QBUF , CTOT , PSM ,
003       $NPCOL , INDX , INDXC , INDXP , INDCOL , COLTYP , NN ,
004       $NN1 , NN2 , IB1 , IB2 )
005  
006  *     -- ScaLAPACK auxiliary routine(version 1.7) --
007  *     University of Tennessee , Knoxville , Oak Ridge National Laboratory ,
008  *     and University of California , Berkeley.
009  *     December 31 , 1998
010  
011  *     .. Scalar Arguments ..
012        INTEGER DCOL , DROW , IB1 , IB2 , ICTXT , K , LDQ , LDQ2 , N ,
013       $N1 , NB , NN , NN1 , NN2 , NPCOL
014        DOUBLE PRECISION RHO
015        DOUBLE PRECISION MONE , ZERO , ONE , TWO , EIGHT
016        PARAMETER( MONE = - 1.0D0 , ZERO = 0.0D0 , ONE = 1.0D0 ,
017       $TWO = 2.0D0 , EIGHT = 8.0D0 )
018  *     ..
019  *     .. Local Scalars ..
020        INTEGER COL , CT , I , IAM , IE1 , IE2 , IMAX , INFO , J , JJQ2 ,
021       $JJS , JMAX , JS , K2 , MYCOL , MYROW , N1P1 , N2 , NJ ,
022       $NJCOL , NJJ , NP , NPROCS , NPROW , PJ , PJCOL , PJJ
023        DOUBLE PRECISION C , EPS , S , T , TAU , TOL
024  *     ..
025  *     .. External Functions ..
026        INTEGER IDAMAX , INDXG2L , INDXL2G , NUMROC
027        DOUBLE PRECISION DLAPY2 , PDLAMCH
028        EXTERNAL IDAMAX , INDXG2L , INDXL2G , NUMROC , PDLAMCH ,
029       $DLAPY2
030  *     ..
031  *     .. External Subroutines ..
032        EXTERNAL BLACS_GRIDINFO , BLACS_PINFO , DCOPY , DGERV2D ,
033       $DGESD2D , DLAPST , DROT , DSCAL , INFOG1L
034  *     ..
035  *     .. Intrinsic Functions ..
036        INTRINSIC ABS , MAX , MIN , MOD , SQRT
037  *     ..
038  *     .. External Functions ..
039  *     ..
040  *     .. Local Arrays ..
041        INTEGER PTT( 4 )
042  *     ..
043  *     .. Executable Statements ..
044  
045  *     Quick return if possible
046  
047        IF( N.EQ.0 )
048       $    RETURN
049  
050            CALL BLACS_PINFO( IAM , NPROCS )
051            CALL BLACS_GRIDINFO( ICTXT , NPROW , NPCOL , MYROW , MYCOL )
052            NP = NUMROC( N , NB , MYROW , DROW , NPROW )
053  
054            N2 = N - N1
055            N1P1 = N1 + 1
056  
057            IF( RHO.LT.ZERO ) THEN
058                CALL DSCAL( N2 , MONE , Z( N1P1 ) , 1 )
059            END IF
060  
061  *         Normalize z so that norm(z) = 1. Since z is the concatenation of
062  *         two normalized vectors , norm2(z) = sqrt(2).
063  
064            T = ONE / SQRT( TWO )
065            CALL DSCAL( N , T , Z , 1 )
066  
067  *         RHO = ABS( norm(z)**2 * RHO )
068  
069            RHO = ABS( TWO*RHO )
070  
071  *         Calculate the allowable deflation tolerance
072  
073            IMAX = IDAMAX( N , Z , 1 )
074            JMAX = IDAMAX( N , D , 1 )
075            EPS = PDLAMCH( ICTXT , 'Epsilon' )
076            TOL = EIGHT*EPS*MAX( ABS( D( JMAX ) ) , ABS( Z( IMAX ) ) )
077  
078  *         If the rank - 1 modifier is small enough , no more needs to be done
079  *         except to reorganize Q so that its columns correspond with the
080  *         elements in D.
081  
082            IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN
083                K = 0
084                GO TO 220
085            END IF
086  
087  *         If there are multiple eigenvalues then the problem deflates. Here
088  *         the number of equal eigenvalues are found. As each equal
089  *         eigenvalue is found , an elementary reflector is computed to rotate
090  *         the corresponding eigensubspace so that the corresponding
091  *         components of Z are zero in this new basis.
092  
093            CALL DLAPST ( 'I' , N , D , INDX , INFO )
094  
095            DO 10 I = 1 , N1
096                COLTYP( I ) = 1
097     10     CONTINUE
098            DO 20 I = N1P1 , N
099                COLTYP( I ) = 3
100     20     CONTINUE
101            COL = DCOL
102            DO 40 I = 1 , N , NB
103                DO 30 J = 0 , NB - 1
104                    IF( I + J.LE.N )
105       $                INDCOL( I + J ) = COL
106     30         CONTINUE
107                COL = MOD( COL + 1 , NPCOL )
108     40     CONTINUE
109  
110            K = 0
111            K2 = N + 1
112            DO 50 J = 1 , N
113                NJ = INDX( J )
114                IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN
115  
116  *                 Deflate due to small z component.
117  
118                    K2 = K2 - 1
119                    COLTYP( NJ ) = 4
120                    INDXP( K2 ) = NJ
121                    IF( J.EQ.N )
122       $                GO TO 80
123                    ELSE
124                        PJ = NJ
125                        GO TO 60
126                    END IF
127     50     CONTINUE
128     60 CONTINUE
129        J = J + 1
130        NJ = INDX( J )
131        IF( J.GT.N )
132       $    GO TO 80
133            IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN
134  
135  *             Deflate due to small z component.
136  
137                K2 = K2 - 1
138                COLTYP( NJ ) = 4
139                INDXP( K2 ) = NJ
140            ELSE
141  
142  *             Check if eigenvalues are close enough to allow deflation.
143  
144                S = Z( PJ )
145                C = Z( NJ )
146  
147  *             Find sqrt(a**2 + b**2) without overflow or
148  *             destructive underflow.
149  
150                TAU = DLAPY2( C , S )
151                T = D( NJ ) - D( PJ )
152                C = C / TAU
153                S = - S / TAU
154                IF( ABS( T*C*S ).LE.TOL ) THEN
155  
156  *                 Deflation is possible.
157  
158                    Z( NJ ) = TAU
159                    Z( PJ ) = ZERO
160                    IF( COLTYP( NJ ).NE.COLTYP( PJ ) )
161       $                COLTYP( NJ ) = 2
162                        COLTYP( PJ ) = 4
163                        CALL INFOG1L( NJ , NB , NPCOL , MYCOL , DCOL , NJJ , NJCOL )
164                        CALL INFOG1L( PJ , NB , NPCOL , MYCOL , DCOL , PJJ , PJCOL )
165                        IF( INDCOL( PJ ).EQ.INDCOL( NJ ) .AND. MYCOL.EQ.NJCOL ) THEN
166                            CALL DROT( NP , Q( 1 , PJJ ) , 1 , Q( 1 , NJJ ) , 1 , C , S )
167                        ELSE IF( MYCOL.EQ.PJCOL ) THEN
168                            CALL DGESD2D( ICTXT , NP , 1 , Q( 1 , PJJ ) , NP , MYROW ,
169       $                    NJCOL )
170                            CALL DGERV2D( ICTXT , NP , 1 , QBUF , NP , MYROW , NJCOL )
171                            CALL DROT( NP , Q( 1 , PJJ ) , 1 , QBUF , 1 , C , S )
172                        ELSE IF( MYCOL.EQ.NJCOL ) THEN
173                            CALL DGESD2D( ICTXT , NP , 1 , Q( 1 , NJJ ) , NP , MYROW ,
174       $                    PJCOL )
175                            CALL DGERV2D( ICTXT , NP , 1 , QBUF , NP , MYROW , PJCOL )
176                            CALL DROT( NP , QBUF , 1 , Q( 1 , NJJ ) , 1 , C , S )
177                        END IF
178                        T = D( PJ )*C**2 + D( NJ )*S**2
179                        D( NJ ) = D( PJ )*S**2 + D( NJ )*C**2
180                        D( PJ ) = T
181                        K2 = K2 - 1
182                        I = 1
183     70 CONTINUE
184        IF( K2 + I.LE.N ) THEN
185            IF( D( PJ ).LT.D( INDXP( K2 + I ) ) ) THEN
186                INDXP( K2 + I - 1 ) = INDXP( K2 + I )
187                INDXP( K2 + I ) = PJ
188                I = I + 1
189                GO TO 70
190            ELSE
191                INDXP( K2 + I - 1 ) = PJ
192            END IF
193        ELSE
194            INDXP( K2 + I - 1 ) = PJ
195        END IF
196        PJ = NJ
197        ELSE
198            K = K + 1
199            DLAMDA( K ) = D( PJ )
200            W( K ) = Z( PJ )
201            INDXP( K ) = PJ
202            PJ = NJ
203        END IF
204        END IF
205        GO TO 60
206     80 CONTINUE
207  
208  *     Record the last eigenvalue.
209  
210        K = K + 1
211        DLAMDA( K ) = D( PJ )
212        W( K ) = Z( PJ )
213        INDXP( K ) = PJ
214  
215  *     Count up the total number of the various types of columns , then
216  *     form a permutation which positions the four column types into
217  *     four uniform groups(although one or more of these groups may be
218  *     empty).
219  
220        DO 100 J = 1 , 4
221            DO 90 I = 0 , NPCOL - 1
222                CTOT( I , J ) = 0
223     90     CONTINUE
224            PTT( J ) = 0
225    100 CONTINUE
226        DO 110 J = 1 , N
227            CT = COLTYP( J )
228            COL = INDCOL( J )
229            CTOT( COL , CT ) = CTOT( COL , CT ) + 1
230    110 CONTINUE
231  
232  *     PSM(*) = Position in SubMatrix(of types 1 through 4)
233  
234        DO 120 COL = 0 , NPCOL - 1
235            PSM( COL , 1 ) = 1
236            PSM( COL , 2 ) = 1 + CTOT( COL , 1 )
237            PSM( COL , 3 ) = PSM( COL , 2 ) + CTOT( COL , 2 )
238            PSM( COL , 4 ) = PSM( COL , 3 ) + CTOT( COL , 3 )
239    120 CONTINUE
240        PTT( 1 ) = 1
241        DO 140 I = 2 , 4
242            CT = 0
243            DO 130 J = 0 , NPCOL - 1
244                CT = CT + CTOT( J , I - 1 )
245    130     CONTINUE
246            PTT( I ) = PTT( I - 1 ) + CT
247    140 CONTINUE
248  
249  *     Fill out the INDXC array so that the permutation which it induces
250  *     will place all type - 1 columns first , all type - 2 columns next ,
251  *     then all type - 3's , and finally all type - 4's.
252  
253        DO 150 J = 1 , N
254            JS = INDXP( J )
255            COL = INDCOL( JS )
256            CT = COLTYP( JS )
257            I = INDXL2G( PSM( COL , CT ) , NB , COL , DCOL , NPCOL )
258            INDX( J ) = I
259            INDXC( PTT( CT ) ) = I
260            PSM( COL , CT ) = PSM( COL , CT ) + 1
261            PTT( CT ) = PTT( CT ) + 1
262    150 CONTINUE
263  
264        DO 160 J = 1 , N
265            JS = INDXP( J )
266            JJS = INDXG2L( JS , NB , J , J , NPCOL )
267            COL = INDCOL( JS )
268            IF( COL.EQ.MYCOL ) THEN
269                I = INDX( J )
270                JJQ2 = INDXG2L( I , NB , J , J , NPCOL )
271                CALL DCOPY( NP , Q( 1 , JJS ) , 1 , Q2( 1 , JJQ2 ) , 1 )
272            END IF
273    160 CONTINUE
274  
275  *     The deflated eigenvalues and their corresponding vectors go back
276  *     into the last N - K slots of D and Q respectively.
277  
278        CALL DCOPY( N , D , 1 , Z , 1 )
279        DO 170 J = K + 1 , N
280            JS = INDXP( J )
281            I = INDX( J )
282            D( I ) = Z( JS )
283    170 CONTINUE
284  
285        PTT( 1 ) = 1
286        DO 190 I = 2 , 4
287            CT = 0
288            DO 180 J = 0 , NPCOL - 1
289                CT = CT + CTOT( J , I - 1 )
290    180     CONTINUE
291            PTT( I ) = PTT( I - 1 ) + CT
292    190 CONTINUE
293  
294        IB1 = INDXC( 1 )
295        IE1 = IB1
296        IB2 = INDXC( PTT( 2 ) )
297        IE2 = IB2
298        DO 200 I = 2 , PTT( 3 ) - 1
299            IB1 = MIN( IB1 , INDXC( I ) )
300            IE1 = MAX( IE1 , INDXC( I ) )
301    200 CONTINUE
302        DO 210 I = PTT( 2 ) , PTT( 4 ) - 1
303            IB2 = MIN( IB2 , INDXC( I ) )
304            IE2 = MAX( IE2 , INDXC( I ) )
305    210 CONTINUE
306        NN1 = IE1 - IB1 + 1
307        NN2 = IE2 - IB2 + 1
308        NN = MAX( IE1 , IE2 ) - MIN( IB1 , IB2 ) + 1
309    220 CONTINUE
310        RETURN
311  
312  *     End of PDLAED2
313  
314        END