Routine: PSLAUUM()  File: SRC\pslauum.f

 
 
# lines: 215
  # code: 215
  # comment: 0
  # blank:0
# Variables:22
# Callers:1
# Callings:1
# Words:81
# Keywords:51
 

 

..
     .. Array Arguments ..
     ..
  Purpose
  =======
  PSLAUUM computes the product U * U' or L' * L, where the triangular
  factor U or L is stored in the upper or lower triangular part of
  the distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1).
  If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
  overwriting the factor U in sub( A ).
  If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
  overwriting the factor L in sub( A ).
  This is the blocked form of the algorithm, calling Level 3 PBLAS.
  Notes
  =====
  Each global data object is described by an associated description
  vector.  This vector stores the information required to establish
  the mapping between an object element and its corresponding process
  and memory location.
  Let A be a generic term for any 2D block cyclicly distributed array.
  Such a global array has an associated description vector DESCA.
  In the following comments, the character _ should be read as
  "of the global array".
  NOTATION        STORED IN      EXPLANATION
  --------------- -------------- --------------------------------------
  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
                                 DTYPE_A = 1.
  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
                                 the BLACS process grid A is distribu-
                                 ted over. The context itself is glo-
                                 bal, but the handle (the integer
                                 value) may vary.
  M_A    (global) DESCA( M_ )    The number of rows in the global
                                 array A.
  N_A    (global) DESCA( N_ )    The number of columns in the global
                                 array A.
  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
                                 the rows of the array.
  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
                                 the columns of the array.
  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
                                 row of the array A is distributed.
  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
                                 first column of the array A is
                                 distributed.
  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
  Let K be the number of rows or columns of a distributed matrix,
  and assume that its process grid has dimension p x q.
  LOCr( K ) denotes the number of elements of K that a process
  would receive if K were distributed over the p processes of its
  process column.
  Similarly, LOCc( K ) denotes the number of elements of K that a
  process would receive if K were distributed over the q processes of
  its process row.
  The values of LOCr() and LOCc() may be determined via a call to the
  ScaLAPACK tool function, NUMROC:
          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
  An upper bound for these quantities may be computed by:
          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
  Arguments
  =========
  UPLO    (global input) CHARACTER*1
          Specifies whether the triangular factor stored in the
          distributed matrix sub( A ) is upper or lower triangular:
          = 'U':  Upper triangular
          = 'L':  Lower triangular
  N       (global input) INTEGER
          The number of rows and columns to be operated on, i.e. the
          order of the triangular factor U or L. N >= 0.
  A       (local input/local output) REAL pointer into the
          local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
          On entry, the local pieces of the triangular factor L or U.
          On exit, if UPLO = 'U', the upper triangle of the distributed
          matrix sub( A ) is overwritten with the upper triangle of the
          product U * U'; if UPLO = 'L', the lower triangle of sub( A )
          is overwritten with the lower triangle of the product L' * L.
  IA      (global input) INTEGER
          The row index in the global array A indicating the first
          row of sub( A ).
  JA      (global input) INTEGER
          The column index in the global array A indicating the
          first column of sub( A ).
  DESCA   (global and local input) INTEGER array of dimension DLEN_.
          The array descriptor for the distributed matrix A.
  =====================================================================
     .. Parameters ..

 
Display dynamic version Find AutoScroll Reload FontSize: - + Hide Comments Hide Blanks Frame FullScreen MailPrint

 
001        SUBROUTINE PSLAUUM( UPLO , N , A , IA , JA , DESCA )
002  
003  *     -- ScaLAPACK auxiliary routine(version 1.7) --
004  *     University of Tennessee , Knoxville , Oak Ridge National Laboratory ,
005  *     and University of California , Berkeley.
006  *     May 1 , 1997
007  
008  *     .. Scalar Arguments ..
009        CHARACTER UPLO
010        INTEGER IA , JA , N
011        INTEGER BLOCK_CYCLIC_2D , CSRC_ , CTXT_ , DLEN_ , DTYPE_ ,
012       $LLD_ , MB_ , M_ , NB_ , N_ , RSRC_
013        PARAMETER( BLOCK_CYCLIC_2D = 1 , DLEN_ = 9 , DTYPE_ = 1 ,
014       $CTXT_ = 2 , M_ = 3 , N_ = 4 , MB_ = 5 , NB_ = 6 ,
015       $RSRC_ = 7 , CSRC_ = 8 , LLD_ = 9 )
016        REAL ONE
017        PARAMETER( ONE = 1.0E + 0 )
018  *     ..
019  *     .. Local Scalars ..
020        INTEGER I , J , JB , JN
021  *     ..
022  *     .. External Subroutines ..
023        EXTERNAL PSGEMM , PSLAUU2 , PSTRMM , PSSYRK
024  *     ..
025  *     .. External Functions ..
026        LOGICAL LSAME
027        INTEGER ICEIL
028        EXTERNAL ICEIL , LSAME
029  *     ..
030  *     .. Intrinsic Functions ..
031        INTRINSIC MIN
032  *     ..
033  *     .. Executable Statements ..
034  
035  *     Quick return if possible
036  
037        IF( N.EQ.0 )
038       $    RETURN
039  
040            JN = MIN( ICEIL( JA , DESCA( NB_ ) ) * DESCA( NB_ ) , JA + N - 1 )
041            IF( LSAME( UPLO , 'U' ) ) THEN
042  
043  *             Compute the product U * U'.
044  
045  *             Handle first block separately
046  
047                JB = JN - JA + 1
048                CALL PSLAUU2 ( 'Upper' , JB , A , IA , JA , DESCA )
049                IF( JB.LE.N - 1 ) THEN
050                    CALL PSSYRK( 'Upper' , 'No transpose' , JB , N - JB , ONE , A , IA ,
051       $            JA + JB , DESCA , ONE , A , IA , JA , DESCA )
052                END IF
053  
054  *             Loop over remaining block of columns
055  
056                DO 10 J = JN + 1 , JA + N - 1 , DESCA( NB_ )
057                    JB = MIN( N - J + JA , DESCA( NB_ ) )
058                    I = IA + J - JA
059                    CALL PSTRMM( 'Right' , 'Upper' , 'Transpose' , 'Non - unit' ,
060       $            J - JA , JB , ONE , A , I , J , DESCA , A , IA , J ,
061       $            DESCA )
062                    CALL PSLAUU2 ( 'Upper' , JB , A , I , J , DESCA )
063                    IF( J + JB.LE.JA + N - 1 ) THEN
064                        CALL PSGEMM( 'No transpose' , 'Transpose' , J - JA , JB ,
065       $                N - J - JB + JA , ONE , A , IA , J + JB , DESCA , A , I ,
066       $                J + JB , DESCA , ONE , A , IA , J , DESCA )
067                        CALL PSSYRK( 'Upper' , 'No transpose' , JB , N - J - JB + JA , ONE ,
068       $                A , I , J + JB , DESCA , ONE , A , I , J , DESCA )
069                    END IF
070     10         CONTINUE
071            ELSE
072  
073  *             Compute the product L' * L.
074  
075  *             Handle first block separately
076  
077                JB = JN - JA + 1
078                CALL PSLAUU2 ( 'Lower' , JB , A , IA , JA , DESCA )
079                IF( JB.LE.N - 1 ) THEN
080                    CALL PSSYRK( 'Lower' , 'Transpose' , JB , N - JB , ONE , A , IA + JB ,
081       $            JA , DESCA , ONE , A , IA , JA , DESCA )
082                END IF
083  
084  *             Loop over remaining block of columns
085  
086                DO 20 J = JN + 1 , JA + N - 1 , DESCA( NB_ )
087                    JB = MIN( N - J + JA , DESCA( NB_ ) )
088                    I = IA + J - JA
089                    CALL PSTRMM( 'Left' , 'Lower' , 'Transpose' , 'Non - unit' , JB ,
090       $            J - JA , ONE , A , I , J , DESCA , A , I , JA , DESCA )
091                    CALL PSLAUU2 ( 'Lower' , JB , A , I , J , DESCA )
092                    IF( J + JB.LE.JA + N - 1 ) THEN
093                        CALL PSGEMM( 'Transpose' , 'No transpose' , JB , J - JA ,
094       $                N - J - JB + JA , ONE , A , I + JB , J , DESCA , A , I + JB ,
095       $                JA , DESCA , ONE , A , I , JA , DESCA )
096                        CALL PSSYRK( 'Lower' , 'Transpose' , JB , N - J - JB + JA , ONE ,
097       $                A , I + JB , J , DESCA , ONE , A , I , J , DESCA )
098                    END IF
099     20         CONTINUE
100            END IF
101  
102            RETURN
103  
104  *         End of PSLAUUM
105  
106        END