Routine: PDLASMSUB()  File: SRC\pdlasmsub.f

 
 
# lines: 368
  # code: 368
  # comment: 0
  # blank:0
# Variables:57
# Callers:0
# Callings:1
# Words:220
# Keywords:134
 

 

..
     .. Array Arguments ..
     ..
  Purpose
  =======
  PDLASMSUB looks for a small subdiagonal element from the bottom
     of the matrix that it can safely set to zero.
  Notes
  =====
  Each global data object is described by an associated description
  vector.  This vector stores the information required to establish
  the mapping between an object element and its corresponding process
  and memory location.
  Let A be a generic term for any 2D block cyclicly distributed array.
  Such a global array has an associated description vector DESCA.
  In the following comments, the character _ should be read as
  "of the global array".
  NOTATION        STORED IN      EXPLANATION
  --------------- -------------- --------------------------------------
  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
                                 DTYPE_A = 1.
  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
                                 the BLACS process grid A is distribu-
                                 ted over. The context itself is glo-
                                 bal, but the handle (the integer
                                 value) may vary.
  M_A    (global) DESCA( M_ )    The number of rows in the global
                                 array A.
  N_A    (global) DESCA( N_ )    The number of columns in the global
                                 array A.
  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
                                 the rows of the array.
  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
                                 the columns of the array.
  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
                                 row of the array A is distributed.
  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
                                 first column of the array A is
                                 distributed.
  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
  Let K be the number of rows or columns of a distributed matrix,
  and assume that its process grid has dimension p x q.
  LOCr( K ) denotes the number of elements of K that a process
  would receive if K were distributed over the p processes of its
  process column.
  Similarly, LOCc( K ) denotes the number of elements of K that a
  process would receive if K were distributed over the q processes of
  its process row.
  The values of LOCr() and LOCc() may be determined via a call to the
  ScaLAPACK tool function, NUMROC:
          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
  An upper bound for these quantities may be computed by:
          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
  Arguments
  =========
  A       (global input) DOUBLE PRECISION array, dimension
          (DESCA(LLD_),*)
          On entry, the Hessenberg matrix whose tridiagonal part is
          being scanned.
          Unchanged on exit.
  DESCA   (global and local input) INTEGER array of dimension DLEN_.
          The array descriptor for the distributed matrix A.
  I       (global input) INTEGER
          The global location of the bottom of the unreduced
          submatrix of A.
          Unchanged on exit.
  L       (global input) INTEGER
          The global location of the top of the unreduced submatrix
          of A.
          Unchanged on exit.
  K       (global output) INTEGER
          On exit, this yields the bottom portion of the unreduced
          submatrix.  This will satisfy: L <= M  <= I-1.
  SMLNUM  (global input) DOUBLE PRECISION
          On entry, a "small number" for the given matrix.
          Unchanged on exit.
  BUF     (local output) DOUBLE PRECISION array of size LWORK.
  LWORK   (global input) INTEGER
          On exit, LWORK is the size of the work buffer.
          This must be at least 2*Ceil( Ceil( (I-L)/HBL ) /
                                        LCM(NPROW,NPCOL) )
          Here LCM is least common multiple, and NPROWxNPCOL is the
          logical grid size.
   Notes:
     This routine does a global maximum and must be called by all
     processes.
     This code is basically a parallelization of the following snip
     of LAPACK code from DLAHQR:
        Look for a single small subdiagonal element.
        DO 20 K = I, L + 1, -1
           TST1 = ABS( H( K-1, K-1 ) ) + ABS( H( K, K ) )
           IF( TST1.EQ.ZERO )
    $         TST1 = DLANHS( '1', I-L+1, H( L, L ), LDH, WORK )
           IF( ABS( H( K, K-1 ) ).LE.MAX( ULP*TST1, SMLNUM ) )
    $         GO TO 30
  20    CONTINUE
  30    CONTINUE
  Implemented by:  G. Henry, November 17, 1996
  =====================================================================
     .. Parameters ..

 
Display dynamic version Find AutoScroll Reload FontSize: - + Hide Comments Hide Blanks Frame FullScreen MailPrint

 
001        SUBROUTINE PDLASMSUB( A , DESCA , I , L , K , SMLNUM , BUF , LWORK )
002  
003  *     -- ScaLAPACK routine(version 1.7) --
004  *     University of Tennessee , Knoxville , Oak Ridge National Laboratory ,
005  *     and University of California , Berkeley.
006  *     May 1 , 1997
007  
008  *     .. Scalar Arguments ..
009        INTEGER I , K , L , LWORK
010        DOUBLE PRECISION SMLNUM
011        INTEGER BLOCK_CYCLIC_2D , CSRC_ , CTXT_ , DLEN_ , DTYPE_ ,
012       $LLD_ , MB_ , M_ , NB_ , N_ , RSRC_
013        PARAMETER( BLOCK_CYCLIC_2D = 1 , DLEN_ = 9 , DTYPE_ = 1 ,
014       $CTXT_ = 2 , M_ = 3 , N_ = 4 , MB_ = 5 , NB_ = 6 ,
015       $RSRC_ = 7 , CSRC_ = 8 , LLD_ = 9 )
016        DOUBLE PRECISION ZERO
017        PARAMETER( ZERO = 0.0D + 0 )
018  *     ..
019  *     .. Local Scalars ..
020        INTEGER CONTXT , DOWN , HBL , IBUF1 , IBUF2 , ICOL1 , ICOL2 ,
021       $II , III , IRCV1 , IRCV2 , IROW1 , IROW2 , ISRC ,
022       $ISTR1 , ISTR2 , ITMP1 , ITMP2 , JJ , JJJ , JSRC , LDA ,
023       $LEFT , MODKM1 , MYCOL , MYROW , NPCOL , NPROW , NUM ,
024       $RIGHT , UP
025        DOUBLE PRECISION H10 , H11 , H22 , TST1 , ULP
026  *     ..
027  *     .. External Functions ..
028        INTEGER ILCM , NUMROC
029        DOUBLE PRECISION PDLAMCH
030        EXTERNAL ILCM , NUMROC , PDLAMCH
031  *     ..
032  *     .. External Subroutines ..
033        EXTERNAL BLACS_GRIDINFO , DGERV2D , DGESD2D , IGAMX2D ,
034       $INFOG1L , INFOG2L
035  *     ..
036  *     .. Intrinsic Functions ..
037        INTRINSIC ABS , MAX , MOD
038  *     ..
039  *     .. Executable Statements ..
040  
041        HBL = DESCA( MB_ )
042        CONTXT = DESCA( CTXT_ )
043        LDA = DESCA( LLD_ )
044        ULP = PDLAMCH( CONTXT , 'PRECISION' )
045        CALL BLACS_GRIDINFO( CONTXT , NPROW , NPCOL , MYROW , MYCOL )
046        LEFT = MOD( MYCOL + NPCOL - 1 , NPCOL )
047        RIGHT = MOD( MYCOL + 1 , NPCOL )
048        UP = MOD( MYROW + NPROW - 1 , NPROW )
049        DOWN = MOD( MYROW + 1 , NPROW )
050        NUM = NPROW*NPCOL
051  
052  *     BUFFER1 STARTS AT BUF(ISTR1 + 1) AND WILL CONTAINS IBUF1 ELEMENTS
053  *     BUFFER2 STARTS AT BUF(ISTR2 + 1) AND WILL CONTAINS IBUF2 ELEMENTS
054  
055        ISTR1 = 0
056        ISTR2 =(( I - L ) / HBL )
057        IF( ISTR2*HBL.LT.( I - L ) )
058       $    ISTR2 = ISTR2 + 1
059            II = ISTR2 / ILCM( NPROW , NPCOL )
060            IF( II*ILCM( NPROW , NPCOL ).LT.ISTR2 ) THEN
061                ISTR2 = II + 1
062            ELSE
063                ISTR2 = II
064            END IF
065            IF( LWORK.LT.2*ISTR2 ) THEN
066  
067  *             Error !
068  
069                RETURN
070            END IF
071            CALL INFOG2L( I , I , DESCA , NPROW , NPCOL , MYROW , MYCOL , IROW1 ,
072       $    ICOL1 , II , JJ )
073            MODKM1 = MOD( I - 1 + HBL , HBL )
074  
075  *         COPY OUR RELEVANT PIECES OF TRIADIAGONAL THAT WE OWE INTO
076  *         2 BUFFERS TO SEND TO WHOMEVER OWNS H(K , K) AS K MOVES DIAGONALLY
077  *         UP THE TRIDIAGONAL
078  
079            IBUF1 = 0
080            IBUF2 = 0
081            IRCV1 = 0
082            IRCV2 = 0
083            DO 10 K = I , L + 1 , - 1
084                IF(( MODKM1.EQ.0 ) .AND.( DOWN.EQ.II ) .AND.
085       $( RIGHT.EQ.JJ ) ) THEN
086  
087  *             WE MUST PACK H(K - 1 , K - 1) AND SEND IT DIAGONAL DOWN
088  
089                IF(( DOWN.NE.MYROW ) .OR.( RIGHT.NE.MYCOL ) ) THEN
090                    CALL INFOG2L( K - 1 , K - 1 , DESCA , NPROW , NPCOL , MYROW ,
091       $            MYCOL , IROW1 , ICOL1 , ISRC , JSRC )
092                    IBUF1 = IBUF1 + 1
093                    BUF( ISTR1 + IBUF1 ) = A(( ICOL1 - 1 )*LDA + IROW1 )
094                END IF
095            END IF
096            IF(( MODKM1.EQ.0 ) .AND.( MYROW.EQ.II ) .AND.
097       $( RIGHT.EQ.JJ ) ) THEN
098  
099  *         WE MUST PACK H(K , K - 1) AND SEND IT RIGHT
100  
101            IF( NPCOL.GT.1 ) THEN
102                CALL INFOG2L( K , K - 1 , DESCA , NPROW , NPCOL , MYROW , MYCOL ,
103       $        IROW1 , ICOL1 , ISRC , JSRC )
104                IBUF2 = IBUF2 + 1
105                BUF( ISTR2 + IBUF2 ) = A(( ICOL1 - 1 )*LDA + IROW1 )
106            END IF
107        END IF
108  
109  *     ADD UP THE RECEIVES
110  
111        IF(( MYROW.EQ.II ) .AND.( MYCOL.EQ.JJ ) ) THEN
112            IF(( MODKM1.EQ.0 ) .AND.(( NPROW.GT.1 ) .OR.( NPCOL.GT.
113       $    1 ) ) ) THEN
114  
115  *         WE MUST RECEIVE H(K - 1 , K - 1) FROM DIAGONAL UP
116  
117            IRCV1 = IRCV1 + 1
118        END IF
119        IF(( MODKM1.EQ.0 ) .AND.( NPCOL.GT.1 ) ) THEN
120  
121  *         WE MUST RECEIVE H(K , K - 1) FROM LEFT
122  
123            IRCV2 = IRCV2 + 1
124        END IF
125        END IF
126  
127  *     POSSIBLY CHANGE OWNERS(OCCURS ONLY WHEN MOD(K - 1 , HBL) = 0)
128  
129        IF( MODKM1.EQ.0 ) THEN
130            II = II - 1
131            JJ = JJ - 1
132            IF( II.LT.0 )
133       $        II = NPROW - 1
134                IF( JJ.LT.0 )
135       $            JJ = NPCOL - 1
136                END IF
137                MODKM1 = MODKM1 - 1
138                IF( MODKM1.LT.0 )
139       $            MODKM1 = HBL - 1
140     10 CONTINUE
141  
142  *     SEND DATA ON TO THE APPROPRIATE NODE IF THERE IS ANY DATA TO SEND
143  
144        IF( IBUF1.GT.0 ) THEN
145            CALL DGESD2D( CONTXT , IBUF1 , 1 , BUF( ISTR1 + 1 ) , IBUF1 , DOWN ,
146       $    RIGHT )
147        END IF
148        IF( IBUF2.GT.0 ) THEN
149            CALL DGESD2D( CONTXT , IBUF2 , 1 , BUF( ISTR2 + 1 ) , IBUF2 , MYROW ,
150       $    RIGHT )
151        END IF
152  
153  *     RECEIVE APPROPRIATE DATA IF THERE IS ANY
154  
155        IF( IRCV1.GT.0 ) THEN
156            CALL DGERV2D( CONTXT , IRCV1 , 1 , BUF( ISTR1 + 1 ) , IRCV1 , UP ,
157       $    LEFT )
158        END IF
159        IF( IRCV2.GT.0 ) THEN
160            CALL DGERV2D( CONTXT , IRCV2 , 1 , BUF( ISTR2 + 1 ) , IRCV2 , MYROW ,
161       $    LEFT )
162        END IF
163  
164  *     START MAIN LOOP
165  
166        IBUF1 = 0
167        IBUF2 = 0
168        CALL INFOG2L( I , I , DESCA , NPROW , NPCOL , MYROW , MYCOL , IROW1 ,
169       $ICOL1 , II , JJ )
170        MODKM1 = MOD( I - 1 + HBL , HBL )
171  
172  *     LOOK FOR A SINGLE SMALL SUBDIAGONAL ELEMENT.
173  
174  *     Start loop for subdiagonal search
175  
176        DO 40 K = I , L + 1 , - 1
177            IF(( MYROW.EQ.II ) .AND.( MYCOL.EQ.JJ ) ) THEN
178                IF( MODKM1.EQ.0 ) THEN
179  
180  *                 Grab information from WORK array
181  
182                    IF( NUM.GT.1 ) THEN
183                        IBUF1 = IBUF1 + 1
184                        H11 = BUF( ISTR1 + IBUF1 )
185                    ELSE
186                        H11 = A(( ICOL1 - 2 )*LDA + IROW1 - 1 )
187                    END IF
188                    IF( NPCOL.GT.1 ) THEN
189                        IBUF2 = IBUF2 + 1
190                        H10 = BUF( ISTR2 + IBUF2 )
191                    ELSE
192                        H10 = A(( ICOL1 - 2 )*LDA + IROW1 )
193                    END IF
194                ELSE
195  
196  *                 Information is local
197  
198                    H11 = A(( ICOL1 - 2 )*LDA + IROW1 - 1 )
199                    H10 = A(( ICOL1 - 2 )*LDA + IROW1 )
200                END IF
201                H22 = A(( ICOL1 - 1 )*LDA + IROW1 )
202                TST1 = ABS( H11 ) + ABS( H22 )
203                IF( TST1.EQ.ZERO ) THEN
204  
205  *                 FIND SOME NORM OF THE LOCAL H(L : I , L : I)
206  
207                    CALL INFOG1L( L , HBL , NPROW , MYROW , 0 , ITMP1 , III )
208                    IROW2 = NUMROC( I , HBL , MYROW , 0 , NPROW )
209                    CALL INFOG1L( L , HBL , NPCOL , MYCOL , 0 , ITMP2 , III )
210                    ICOL2 = NUMROC( I , HBL , MYCOL , 0 , NPCOL )
211                    DO 30 III = ITMP1 , IROW2
212                        DO 20 JJJ = ITMP2 , ICOL2
213                            TST1 = TST1 + ABS( A(( JJJ - 1 )*LDA + III ) )
214     20                 CONTINUE
215     30             CONTINUE
216                END IF
217                IF( ABS( H10 ).LE.MAX( ULP*TST1 , SMLNUM ) )
218       $            GO TO 50
219                    IROW1 = IROW1 - 1
220                    ICOL1 = ICOL1 - 1
221                END IF
222                MODKM1 = MODKM1 - 1
223                IF( MODKM1.LT.0 )
224       $            MODKM1 = HBL - 1
225                    IF(( MODKM1.EQ.HBL - 1 ) .AND.( K.GT.2 ) ) THEN
226                        II = MOD( II + NPROW - 1 , NPROW )
227                        JJ = MOD( JJ + NPCOL - 1 , NPCOL )
228                        CALL INFOG2L( K - 1 , K - 1 , DESCA , NPROW , NPCOL , MYROW , MYCOL ,
229       $                IROW1 , ICOL1 , ITMP1 , ITMP2 )
230                    END IF
231     40 CONTINUE
232     50 CONTINUE
233        CALL IGAMX2D( CONTXT , 'ALL' , ' ' , 1 , 1 , K , 1 , ITMP1 , ITMP2 , - 1 ,
234       $- 1 , - 1 )
235        RETURN
236  
237  *     End of PDLASMSUB
238  
239        END