Routine: PCLAUUM()  File: SRC\pclauum.f

 
 
# lines: 220
  # code: 220
  # comment: 0
  # blank:0
# Variables:23
# Callers:1
# Callings:1
# Words:83
# Keywords:52
 

 

..
     .. Array Arguments ..
     ..
  Purpose
  =======
  PCLAUUM computes the product U * U' or L' * L, where the triangular
  factor U or L is stored in the upper or lower triangular part of
  the distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1).
  If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
  overwriting the factor U in sub( A ).
  If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
  overwriting the factor L in sub( A ).
  This is the blocked form of the algorithm, calling Level 3 PBLAS.
  Notes
  =====
  Each global data object is described by an associated description
  vector.  This vector stores the information required to establish
  the mapping between an object element and its corresponding process
  and memory location.
  Let A be a generic term for any 2D block cyclicly distributed array.
  Such a global array has an associated description vector DESCA.
  In the following comments, the character _ should be read as
  "of the global array".
  NOTATION        STORED IN      EXPLANATION
  --------------- -------------- --------------------------------------
  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
                                 DTYPE_A = 1.
  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
                                 the BLACS process grid A is distribu-
                                 ted over. The context itself is glo-
                                 bal, but the handle (the integer
                                 value) may vary.
  M_A    (global) DESCA( M_ )    The number of rows in the global
                                 array A.
  N_A    (global) DESCA( N_ )    The number of columns in the global
                                 array A.
  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
                                 the rows of the array.
  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
                                 the columns of the array.
  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
                                 row of the array A is distributed.
  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
                                 first column of the array A is
                                 distributed.
  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
  Let K be the number of rows or columns of a distributed matrix,
  and assume that its process grid has dimension p x q.
  LOCr( K ) denotes the number of elements of K that a process
  would receive if K were distributed over the p processes of its
  process column.
  Similarly, LOCc( K ) denotes the number of elements of K that a
  process would receive if K were distributed over the q processes of
  its process row.
  The values of LOCr() and LOCc() may be determined via a call to the
  ScaLAPACK tool function, NUMROC:
          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
  An upper bound for these quantities may be computed by:
          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
  Arguments
  =========
  UPLO    (global input) CHARACTER*1
          Specifies whether the triangular factor stored in the
          distributed matrix sub( A ) is upper or lower triangular:
          = 'U':  Upper triangular
          = 'L':  Lower triangular
  N       (global input) INTEGER
          The number of rows and columns to be operated on, i.e. the
          order of the triangular factor U or L. N >= 0.
  A       (local input/local output) COMPLEX pointer into the
          local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
          On entry, the local pieces of the triangular factor L or U.
          On exit, if UPLO = 'U', the upper triangle of the distributed
          matrix sub( A ) is overwritten with the upper triangle of the
          product U * U'; if UPLO = 'L', the lower triangle of sub( A )
          is overwritten with the lower triangle of the product L' * L.
  IA      (global input) INTEGER
          The row index in the global array A indicating the first
          row of sub( A ).
  JA      (global input) INTEGER
          The column index in the global array A indicating the
          first column of sub( A ).
  DESCA   (global and local input) INTEGER array of dimension DLEN_.
          The array descriptor for the distributed matrix A.
  =====================================================================
     .. Parameters ..

 
Display dynamic version Find AutoScroll Reload FontSize: - + Hide Comments Hide Blanks Frame FullScreen MailPrint

 
001        SUBROUTINE PCLAUUM( UPLO , N , A , IA , JA , DESCA )
002  
003  *     -- ScaLAPACK auxiliary routine(version 1.7) --
004  *     University of Tennessee , Knoxville , Oak Ridge National Laboratory ,
005  *     and University of California , Berkeley.
006  *     May 1 , 1997
007  
008  *     .. Scalar Arguments ..
009        CHARACTER UPLO
010        INTEGER IA , JA , N
011        INTEGER BLOCK_CYCLIC_2D , CSRC_ , CTXT_ , DLEN_ , DTYPE_ ,
012       $LLD_ , MB_ , M_ , NB_ , N_ , RSRC_
013        PARAMETER( BLOCK_CYCLIC_2D = 1 , DLEN_ = 9 , DTYPE_ = 1 ,
014       $CTXT_ = 2 , M_ = 3 , N_ = 4 , MB_ = 5 , NB_ = 6 ,
015       $RSRC_ = 7 , CSRC_ = 8 , LLD_ = 9 )
016        REAL ONE
017        PARAMETER( ONE = 1.0E + 0 )
018        COMPLEX CONE
019        PARAMETER( CONE = 1.0E + 0 )
020  *     ..
021  *     .. Local Scalars ..
022        INTEGER I , J , JB , JN
023  *     ..
024  *     .. External Subroutines ..
025        EXTERNAL PCGEMM , PCHERK , PCLAUU2 , PCTRMM
026  *     ..
027  *     .. External Functions ..
028        LOGICAL LSAME
029        INTEGER ICEIL
030        EXTERNAL ICEIL , LSAME
031  *     ..
032  *     .. Intrinsic Functions ..
033        INTRINSIC MIN
034  *     ..
035  *     .. Executable Statements ..
036  
037  *     Quick return if possible
038  
039        IF( N.EQ.0 )
040       $    RETURN
041  
042            JN = MIN( ICEIL( JA , DESCA( NB_ ) ) * DESCA( NB_ ) , JA + N - 1 )
043            IF( LSAME( UPLO , 'U' ) ) THEN
044  
045  *             Compute the product U * U'.
046  
047  *             Handle first block separately
048  
049                JB = JN - JA + 1
050                CALL PCLAUU2 ( 'Upper' , JB , A , IA , JA , DESCA )
051                IF( JB.LE.N - 1 ) THEN
052                    CALL PCHERK( 'Upper' , 'No transpose' , JB , N - JB , ONE , A , IA ,
053       $            JA + JB , DESCA , ONE , A , IA , JA , DESCA )
054                END IF
055  
056  *             Loop over remaining block of columns
057  
058                DO 10 J = JN + 1 , JA + N - 1 , DESCA( NB_ )
059                    JB = MIN( N - J + JA , DESCA( NB_ ) )
060                    I = IA + J - JA
061                    CALL PCTRMM( 'Right' , 'Upper' , 'Conjugate transpose' ,
062       $            'Non - unit' , J - JA , JB , CONE , A , I , J , DESCA ,
063       $            A , IA , J , DESCA )
064                    CALL PCLAUU2 ( 'Upper' , JB , A , I , J , DESCA )
065                    IF( J + JB.LE.JA + N - 1 ) THEN
066                        CALL PCGEMM( 'No transpose' , 'Conjugate transpose' ,
067       $                J - JA , JB , N - J - JB + JA , CONE , A , IA , J + JB ,
068       $                DESCA , A , I , J + JB , DESCA , CONE , A , IA ,
069       $                J , DESCA )
070                        CALL PCHERK( 'Upper' , 'No transpose' , JB , N - J - JB + JA , ONE ,
071       $                A , I , J + JB , DESCA , ONE , A , I , J , DESCA )
072                    END IF
073     10         CONTINUE
074            ELSE
075  
076  *             Compute the product L' * L.
077  
078  *             Handle first block separately
079  
080                JB = JN - JA + 1
081                CALL PCLAUU2 ( 'Lower' , JB , A , IA , JA , DESCA )
082                IF( JB.LE.N - 1 ) THEN
083                    CALL PCHERK( 'Lower' , 'Conjugate transpose' , JB , N - JB , ONE ,
084       $            A , IA + JB , JA , DESCA , ONE , A , IA , JA , DESCA )
085                END IF
086  
087  *             Loop over remaining block of columns
088  
089                DO 20 J = JN + 1 , JA + N - 1 , DESCA( NB_ )
090                    JB = MIN( N - J + JA , DESCA( NB_ ) )
091                    I = IA + J - JA
092                    CALL PCTRMM( 'Left' , 'Lower' , 'Conjugate Transpose' ,
093       $            'Non - unit' , JB , J - JA , CONE , A , I , J , DESCA , A ,
094       $            I , JA , DESCA )
095                    CALL PCLAUU2 ( 'Lower' , JB , A , I , J , DESCA )
096                    IF( J + JB.LE.JA + N - 1 ) THEN
097                        CALL PCGEMM( 'Conjugate transpose' , 'No transpose' , JB ,
098       $                J - JA , N - J - JB + JA , CONE , A , I + JB , J , DESCA ,
099       $                A , I + JB , JA , DESCA , CONE , A , I , JA , DESCA )
100                        CALL PCHERK( 'Lower' , 'Conjugate transpose' , JB ,
101       $                N - J - JB + JA , ONE , A , I + JB , J , DESCA , ONE ,
102       $                A , I , J , DESCA )
103                    END IF
104     20         CONTINUE
105            END IF
106  
107            RETURN
108  
109  *         End of PCLAUUM
110  
111        END