SRC\pslahrd.f

#lines: 287   size: 10 Kb   creation: 29/03/2007 01:44:42   last modification: 08/05/2008 18:38:04   attribute: ARCH    Find   Reload  
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67:
68:
69:
70:
71:
72:
73:
74:
75:
76:
77:
78:
79:
80:
81:
82:
83:
84:
85:
86:
87:
88:
89:
90:
91:
92:
93:
94:
95:
96:
97:
98:
99:
100:
101:
102:
103:
104:
105:
106:
107:
108:
109:
110:
111:
112:
113:
114:
115:
116:
117:
118:
119:
120:
121:
122:
123:
124:
125:
126:
127:
128:
129:
130:
131:
132:
133:
134:
135:
136:
137:
138:
139:
140:
141:
142:
143:
144:
145:
146:
147:
148:
149:
150:
151:
152:
153:
154:
155:
156:
157:
158:
159:
160:
161:
162:
163:
164:
165:
166:
167:
168:
169:
170:
171:
172:
173:
174:
175:
176:
177:
178:
179:
180:
181:
182:
183:
184:
185:
186:
187:
188:
189:
190:
191:
192:
193:
194:
195:
196:
197:
198:
199:
200:
201:
202:
203:
204:
205:
206:
207:
208:
209:
210:
211:
212:
213:
214:
215:
216:
217:
218:
219:
220:
221:
222:
223:
224:
225:
226:
227:
228:
229:
230:
231:
232:
233:
234:
235:
236:
237:
238:
239:
240:
241:
242:
243:
244:
245:
246:
247:
248:
249:
250:
251:
252:
253:
254:
255:
256:
257:
258:
259:
260:
261:
262:
263:
264:
265:
266:
267:
268:
269:
270:
271:
272:
273:
274:
275:
276:
277:
278:
279:
280:
281:
282:
283:
284:
285:
286:
287:
      SUBROUTINE PSLAHRD( N, K, NB, A, IA, JA, DESCA, TAU, T, Y, IY, JY,
     $                    DESCY, WORK )
*
*  -- ScaLAPACK auxiliary routine (version 1.7) --
*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
*     and University of California, Berkeley.
*     January 30, 2006
*
*     .. Scalar Arguments ..
      INTEGER             IA, IY, JA, JY, K, N, NB
*     ..
*     .. Array Arguments ..
      INTEGER             DESCA( * ), DESCY( * )
      REAL                A( * ), T( * ), TAU( * ), WORK( * ), Y( * )
*     ..
*
*  Purpose
*  =======
*
*  PSLAHRD reduces the first NB columns of a real general N-by-(N-K+1)
*  distributed matrix A(IA:IA+N-1,JA:JA+N-K) so that elements below the
*  k-th subdiagonal are zero. The reduction is performed by an orthogo-
*  nal similarity transformation Q' * A * Q. The routine returns the
*  matrices V and T which determine Q as a block reflector I - V*T*V',
*  and also the matrix Y = A * V * T.
*
*  This is an auxiliary routine called by PSGEHRD. In the following
*  comments sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1).
*
*  Arguments
*  =========
*
*  N       (global input) INTEGER
*          The number of rows and columns to be operated on, i.e. the
*          order of the distributed submatrix sub( A ).
*          N >= 0.
*
*  K       (global input) INTEGER
*          The offset for the reduction. Elements below the k-th
*          subdiagonal in the first NB columns are reduced to zero.
*
*  NB      (global input) INTEGER
*          The number of columns to be reduced.
*
*  A       (local input/local output) REAL pointer into
*          the local memory to an array of dimension (LLD_A,
*          LOCc(JA+N-K)). On entry, this array contains the the local
*          pieces of the N-by-(N-K+1) general distributed matrix
*          A(IA:IA+N-1,JA:JA+N-K). On exit, the elements on and above
*          the k-th subdiagonal in the first NB columns are overwritten
*          with the corresponding elements of the reduced distributed
*          matrix; the elements below the k-th subdiagonal, with the
*          array TAU, represent the matrix Q as a product of elementary
*          reflectors. The other columns of A(IA:IA+N-1,JA:JA+N-K) are
*          unchanged. See Further Details.
*
*  IA      (global input) INTEGER
*          The row index in the global array A indicating the first
*          row of sub( A ).
*
*  JA      (global input) INTEGER
*          The column index in the global array A indicating the
*          first column of sub( A ).
*
*  DESCA   (global and local input) INTEGER array of dimension DLEN_.
*          The array descriptor for the distributed matrix A.
*
*  TAU     (local output) REAL array, dimension LOCc(JA+N-2)
*          The scalar factors of the elementary reflectors (see Further
*          Details). TAU is tied to the distributed matrix A.
*
*  T       (local output) REAL array, dimension (NB_A,NB_A)
*          The upper triangular matrix T.
*
*  Y       (local output) REAL pointer into the local memory
*          to an array of dimension (LLD_Y,NB_A). On exit, this array
*          contains the local pieces of the N-by-NB distributed
*          matrix Y. LLD_Y >= LOCr(IA+N-1).
*
*  IY      (global input) INTEGER
*          The row index in the global array Y indicating the first
*          row of sub( Y ).
*
*  JY      (global input) INTEGER
*          The column index in the global array Y indicating the
*          first column of sub( Y ).
*
*  DESCY   (global and local input) INTEGER array of dimension DLEN_.
*          The array descriptor for the distributed matrix Y.
*
*  WORK    (local workspace) REAL array, dimension (NB)
*
*  Further Details
*  ===============
*
*  The matrix Q is represented as a product of nb elementary reflectors
*
*     Q = H(1) H(2) . . . H(nb).
*
*  Each H(i) has the form
*
*     H(i) = I - tau * v * v'
*
*  where tau is a real scalar, and v is a real vector with
*  v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in
*  A(ia+i+k:ia+n-1,ja+i-1), and tau in TAU(ja+i-1).
*
*  The elements of the vectors v together form the (n-k+1)-by-nb matrix
*  V which is needed, with T and Y, to apply the transformation to the
*  unreduced part of the matrix, using an update of the form:
*  A(ia:ia+n-1,ja:ja+n-k) := (I-V*T*V')*(A(ia:ia+n-1,ja:ja+n-k)-Y*V').
*
*  The contents of A(ia:ia+n-1,ja:ja+n-k) on exit are illustrated by the
*  following example with n = 7, k = 3 and nb = 2:
*
*     ( a   h   a   a   a )
*     ( a   h   a   a   a )
*     ( a   h   a   a   a )
*     ( h   h   a   a   a )
*     ( v1  h   a   a   a )
*     ( v1  v2  a   a   a )
*     ( v1  v2  a   a   a )
*
*  where a denotes an element of the original matrix
*  A(ia:ia+n-1,ja:ja+n-k), h denotes a modified element of the upper
*  Hessenberg matrix H, and vi denotes an element of the vector
*  defining H(i).
*
*  =====================================================================
*
*     .. Parameters ..
      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
     $                   LLD_, MB_, M_, NB_, N_, RSRC_
      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
      REAL               ONE, ZERO
      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
*     ..
*     .. Local Scalars ..
      LOGICAL            IPROC
      INTEGER            I, IACOL, IAROW, ICTXT, IOFF, II, J, JJ, JL,
     $                   JT, JW, L, MYROW, MYCOL, NPCOL, NPROW, NQ
      REAL               EI
*     ..
*     .. Local Arrays ..
      INTEGER            DESCW( DLEN_ )
*     ..
*     .. External Functions ..
      INTEGER            NUMROC
      EXTERNAL           NUMROC
*     ..
*     .. External Subroutines ..
      EXTERNAL           BLACS_GRIDINFO, DESCSET, INFOG2L, PSELSET,
     $                   PSGEMV, PSLARFG, PSSCAL, SAXPY,
     $                   SCOPY, SSCAL, STRMV
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          MIN, MOD
*     ..
*     .. Executable Statements ..
*
*     Quick return if possible
*
      IF( N.LE.1 )
     $   RETURN
*
      ICTXT = DESCA( CTXT_ )
      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
*
      IOFF = MOD( JA-1, DESCA( NB_ ) )
      CALL INFOG2L( IA+K, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II,
     $              JJ, IAROW, IACOL )
*
      IPROC = ( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL )
      NQ = NUMROC( N+JA-1, DESCA( NB_ ), MYCOL, IACOL, NPCOL )
      IF( MYCOL.EQ.IACOL )
     $   NQ = NQ - IOFF
*
      EI = ZERO

      JW = IOFF + 1
      CALL DESCSET( DESCW, 1, DESCA( MB_ ), 1, DESCA( MB_ ), IAROW,
     $              IACOL, ICTXT, 1 )
*
      DO 10 L = 1, NB
         I = IA + K + L - 2
         J = JA + L - 1
*
         IF( L.GT.1 ) THEN
*
*           Update A(ia:ia+n-1,j)
*
*           Compute i-th column of A - Y * V'
*
            CALL PSGEMV( 'No transpose', N, L-1, -ONE, Y, IY, JY, DESCY,
     $                   A, I, JA, DESCA, DESCA( M_ ), ONE, A, IA, J,
     $                   DESCA, 1 )
*
*           Apply I - V * T' * V' to this column (call it b) from the
*           left, using the last column of T as workspace
*
*           Let  V = ( V1 )   and   b = ( b1 )   (first I-1 rows)
*                    ( V2 )             ( b2 )
*
*           where V1 is unit lower triangular
*
*           w := V1' * b1
*
            IF( IPROC ) THEN
               CALL SCOPY( L-1, A( (JJ+L-2)*DESCA( LLD_ )+II ), 1,
     $                     WORK( JW ), 1 )
               CALL STRMV( 'Lower', 'Transpose', 'Unit', L-1,
     $                     A( (JJ-1)*DESCA( LLD_ )+II ), DESCA( LLD_ ),
     $                     WORK( JW ), 1 )
            END IF
*
*           w := w + V2'*b2
*
            CALL PSGEMV( 'Transpose', N-K-L+1, L-1, ONE, A, I+1, JA,
     $                   DESCA, A, I+1, J, DESCA, 1, ONE, WORK, 1, JW,
     $                   DESCW, DESCW( M_ ) )
*
*           w := T'*w
*
            IF( IPROC )
     $         CALL STRMV( 'Upper', 'Transpose', 'Non-unit', L-1, T,
     $                     DESCA( NB_ ), WORK( JW ), 1 )
*
*           b2 := b2 - V2*w
*
            CALL PSGEMV( 'No transpose', N-K-L+1, L-1, -ONE, A, I+1, JA,
     $                   DESCA, WORK, 1, JW, DESCW, DESCW( M_ ), ONE,
     $                   A, I+1, J, DESCA, 1 )
*
*           b1 := b1 - V1*w
*
            IF( IPROC ) THEN
               CALL STRMV( 'Lower', 'No transpose', 'Unit', L-1,
     $                     A( (JJ-1)*DESCA( LLD_ )+II ), DESCA( LLD_ ),
     $                     WORK( JW ), 1 )
               CALL SAXPY( L-1, -ONE, WORK( JW ), 1,
     $                     A( ( JJ+L-2 )*DESCA( LLD_ )+II ), 1 )
            END IF
            CALL PSELSET( A, I, J-1, DESCA, EI )
         END IF
*
*        Generate the elementary reflector H(i) to annihilate
*        A(ia+k+i:ia+n-1,j)
*
         CALL PSLARFG( N-K-L+1, EI, I+1, J, A, MIN( I+2, N+IA-1 ), J,
     $                 DESCA, 1, TAU )
         CALL PSELSET( A, I+1, J, DESCA, ONE )
*
*        Compute  Y(iy:y+n-1,jy+l-1)
*
         CALL PSGEMV( 'No transpose', N, N-K-L+1, ONE, A, IA, J+1,
     $                DESCA, A, I+1, J, DESCA, 1, ZERO, Y, IY, JY+L-1,
     $                DESCY, 1 )
         CALL PSGEMV( 'Transpose', N-K-L+1, L-1, ONE, A, I+1, JA, DESCA,
     $                A, I+1, J, DESCA, 1, ZERO, WORK, 1, JW, DESCW,
     $                DESCW( M_ ) )
         CALL PSGEMV( 'No transpose', N, L-1, -ONE, Y, IY, JY, DESCY,
     $                WORK, 1, JW, DESCW, DESCW( M_ ), ONE, Y, IY,
     $                JY+L-1, DESCY, 1 )
         JL = MIN( JJ+L-1, JA+NQ-1 )
         CALL PSSCAL( N, TAU( JL ), Y, IY, JY+L-1, DESCY, 1 )
*
*        Compute T(1:i,i)
*
         IF( IPROC ) THEN
            JT = ( L-1 ) * DESCA( NB_ )
            CALL SSCAL( L-1, -TAU( JL ), WORK( JW ), 1 )
            CALL SCOPY( L-1, WORK( JW ), 1, T( JT+1 ), 1 )
            CALL STRMV( 'Upper', 'No transpose', 'Non-unit', L-1, T,
     $                  DESCA( NB_ ), T( JT+1 ), 1 )
            T( JT+L ) = TAU( JL )
         END IF
   10 CONTINUE
*
      CALL PSELSET( A, K+NB+IA-1, J, DESCA, EI )
*
      RETURN
*
*     End of PSLAHRD
*
      END