SRC\slamsh.f

#lines: 236   size: 8 Kb   creation: 18/01/2006 23:36:04   last modification: 08/05/2008 18:38:22   attribute: ARCH    Find   Reload  
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67:
68:
69:
70:
71:
72:
73:
74:
75:
76:
77:
78:
79:
80:
81:
82:
83:
84:
85:
86:
87:
88:
89:
90:
91:
92:
93:
94:
95:
96:
97:
98:
99:
100:
101:
102:
103:
104:
105:
106:
107:
108:
109:
110:
111:
112:
113:
114:
115:
116:
117:
118:
119:
120:
121:
122:
123:
124:
125:
126:
127:
128:
129:
130:
131:
132:
133:
134:
135:
136:
137:
138:
139:
140:
141:
142:
143:
144:
145:
146:
147:
148:
149:
150:
151:
152:
153:
154:
155:
156:
157:
158:
159:
160:
161:
162:
163:
164:
165:
166:
167:
168:
169:
170:
171:
172:
173:
174:
175:
176:
177:
178:
179:
180:
181:
182:
183:
184:
185:
186:
187:
188:
189:
190:
191:
192:
193:
194:
195:
196:
197:
198:
199:
200:
201:
202:
203:
204:
205:
206:
207:
208:
209:
210:
211:
212:
213:
214:
215:
216:
217:
218:
219:
220:
221:
222:
223:
224:
225:
226:
227:
228:
229:
230:
231:
232:
233:
234:
235:
236:
      SUBROUTINE SLAMSH ( S, LDS, NBULGE, JBLK, H, LDH, N, ULP )
*
*  -- ScaLAPACK auxiliary routine (version 1.7) --
*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
*     and University of California, Berkeley.
*     May 1, 1997
*
*     .. Scalar Arguments ..
      INTEGER            LDS, NBULGE, JBLK, LDH, N
      REAL               ULP
*     ..
*     .. Array Arguments ..
      REAL               S(LDS,*), H(LDH,*)
*     ..
*
*  Purpose
*  =======
*
*  SLAMSH sends multiple shifts through a small (single node) matrix to
*     see how consecutive small subdiagonal elements are modified by 
*     subsequent shifts in an effort to maximize the number of bulges 
*     that can be sent through.
*  SLAMSH should only be called when there are multiple shifts/bulges 
*     (NBULGE > 1) and the first shift is starting in the middle of an
*     unreduced Hessenberg matrix because of two or more consecutive small
*     subdiagonal elements.
*
*  Arguments
*  =========
*
*  S       (local input/output) REAL array, (LDS,*)
*          On entry, the matrix of shifts.  Only the 2x2 diagonal of S is
*             referenced.  It is assumed that S has JBLK double shifts
*             (size 2).
*          On exit, the data is rearranged in the best order for 
*             applying.
*
*  LDS     (local input) INTEGER
*          On entry, the leading dimension of S.  Unchanged on exit.
*              1 < NBULGE <= JBLK <= LDS/2
*
*  NBULGE  (local input/output) INTEGER
*          On entry, the number of bulges to send through H ( >1 ).
*              NBULGE should be less than the maximum determined (JBLK).
*              1 < NBULGE <= JBLK <= LDS/2
*          On exit, the maximum number of bulges that can be sent 
*              through.
*
*  JBLK    (local input) INTEGER
*          On entry, the number of shifts determined for S.
*          Unchanged on exit.
*
*  H       (local input/output) REAL array (LDH,N)
*          On entry, the local matrix to apply the shifts on.
*              H should be aligned so that the starting row is 2.
*          On exit, the data is destroyed.
*
*  LDS     (local input) INTEGER
*          On entry, the leading dimension of S.  Unchanged on exit.
*
*  N       (local input) INTEGER
*          On entry, the size of H.  If all the bulges are expected to
*              go through, N should be at least 4*NBULGE+2.
*              Otherwise, NBULGE may be reduced by this routine.
*
*  ULP     (local input) REAL            
*          On entry, machine precision
*          Unchanged on exit.
*
*  Implemented by:  G. Henry, May 1, 1997
*
*  =====================================================================
*
*     .. Parameters ..
      REAL             ZERO, TEN
      PARAMETER ( ZERO = 0.0E+0, TEN = 10.0E+0 )
*     ..
*     .. Local Scalars ..
      INTEGER          K, IBULGE, M, NR, J, IVAL, I
      REAL             H44, H33, H43H34, H11, H22, H21, H12, H44S,
     $                 H33S, V1, V2, V3, H00, H10, TST1, T1, T2, T3,
     $                 SUM, S1, DVAL
*     ..
*     .. Local Arrays ..
      REAL             V(3)
*     ..
*     .. External Subroutines ..
      EXTERNAL         SLARFG, SCOPY
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC        MAX, ABS
*     ..
*     .. Executable Statements ..
*
      M = 2
      DO 10 IBULGE = 1, NBULGE
         H44 = S(2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+2)
         H33 = S(2*JBLK-2*IBULGE+1,2*JBLK-2*IBULGE+1)
         H43H34 = S(2*JBLK-2*IBULGE+1,2*JBLK-2*IBULGE+2)*
     $            S(2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+1)
         H11 = H( M, M )
         H22 = H( M+1, M+1 )
         H21 = H( M+1, M )
         H12 = H( M, M+1 )
         H44S = H44 - H11
         H33S = H33 - H11
         V1 = ( H33S*H44S-H43H34 ) / H21 + H12
         V2 = H22 - H11 - H33S - H44S
         V3 = H( M+2, M+1 )
         S1 = ABS( V1 ) + ABS( V2 ) + ABS( V3 )
         V1 = V1 / S1
         V2 = V2 / S1
         V3 = V3 / S1
         V( 1 ) = V1
         V( 2 ) = V2
         V( 3 ) = V3
         H00 = H( M-1, M-1 )
         H10 = H( M, M-1 )
         TST1 = ABS( V1 )*( ABS( H00 )+ABS( H11 )+ABS( H22 ) )
         IF( ABS( H10 )*( ABS( V2 )+ABS( V3 ) ).GT.ULP*TST1 ) THEN
*           Find minimum
            DVAL = (ABS(H10)*(ABS(V2)+ABS(V3))) / (ULP*TST1)   
            IVAL = IBULGE
            DO 15 I = IBULGE+1, NBULGE
               H44 = S(2*JBLK-2*I+2, 2*JBLK-2*I+2)
               H33 = S(2*JBLK-2*I+1,2*JBLK-2*I+1)
               H43H34 = S(2*JBLK-2*I+1,2*JBLK-2*I+2)*
     $                  S(2*JBLK-2*I+2, 2*JBLK-2*I+1)
               H11 = H( M, M )
               H22 = H( M+1, M+1 )
               H21 = H( M+1, M )
               H12 = H( M, M+1 )
               H44S = H44 - H11
               H33S = H33 - H11
               V1 = ( H33S*H44S-H43H34 ) / H21 + H12
               V2 = H22 - H11 - H33S - H44S
               V3 = H( M+2, M+1 )
               S1 = ABS( V1 ) + ABS( V2 ) + ABS( V3 )
               V1 = V1 / S1
               V2 = V2 / S1
               V3 = V3 / S1
               V( 1 ) = V1
               V( 2 ) = V2
               V( 3 ) = V3
               H00 = H( M-1, M-1 )
               H10 = H( M, M-1 )
               TST1 = ABS( V1 )*( ABS( H00 )+ABS( H11 )+ABS( H22 ) )
               IF ( (DVAL.GT.(ABS(H10)*(ABS(V2)+ABS(V3)))/(ULP*TST1))
     $             .AND. ( DVAL .GT. 1.D0 ) ) THEN
                  DVAL = (ABS(H10)*(ABS(V2)+ABS(V3))) / (ULP*TST1)   
                  IVAL = I
               END IF
  15        CONTINUE
            IF ( (DVAL .LT. TEN) .AND. (IVAL .NE. IBULGE) ) THEN
               H44 = S(2*JBLK-2*IVAL+2, 2*JBLK-2*IVAL+2)
               H33 = S(2*JBLK-2*IVAL+1,2*JBLK-2*IVAL+1)
               H43H34 = S(2*JBLK-2*IVAL+1,2*JBLK-2*IVAL+2)
               H10 =    S(2*JBLK-2*IVAL+2, 2*JBLK-2*IVAL+1)
               S(2*JBLK-2*IVAL+2,2*JBLK-2*IVAL+2) = 
     $              S(2*JBLK-2*IBULGE+2,2*JBLK-2*IBULGE+2)
               S(2*JBLK-2*IVAL+1,2*JBLK-2*IVAL+1) =
     $              S(2*JBLK-2*IBULGE+1,2*JBLK-2*IBULGE+1)
               S(2*JBLK-2*IVAL+1,2*JBLK-2*IVAL+2) =
     $              S(2*JBLK-2*IBULGE+1,2*JBLK-2*IBULGE+2) 
               S(2*JBLK-2*IVAL+2, 2*JBLK-2*IVAL+1) =
     $              S(2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+1) 
               S(2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+2) = H44
               S(2*JBLK-2*IBULGE+1,2*JBLK-2*IBULGE+1) = H33
               S(2*JBLK-2*IBULGE+1,2*JBLK-2*IBULGE+2) = H43H34
               S(2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+1) = H10
            END IF
            H44 = S(2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+2)
            H33 = S(2*JBLK-2*IBULGE+1,2*JBLK-2*IBULGE+1)
            H43H34 = S(2*JBLK-2*IBULGE+1,2*JBLK-2*IBULGE+2)*
     $               S(2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+1)
            H11 = H( M, M )
            H22 = H( M+1, M+1 )
            H21 = H( M+1, M )
            H12 = H( M, M+1 )
            H44S = H44 - H11
            H33S = H33 - H11
            V1 = ( H33S*H44S-H43H34 ) / H21 + H12
            V2 = H22 - H11 - H33S - H44S
            V3 = H( M+2, M+1 )
            S1 = ABS( V1 ) + ABS( V2 ) + ABS( V3 )
            V1 = V1 / S1
            V2 = V2 / S1
            V3 = V3 / S1
            V( 1 ) = V1
            V( 2 ) = V2
            V( 3 ) = V3
            H00 = H( M-1, M-1 )
            H10 = H( M, M-1 )
            TST1 = ABS( V1 )*( ABS( H00 )+ABS( H11 )+ABS( H22 ) )
         END IF
         IF( ABS( H10 )*( ABS( V2 )+ABS( V3 ) ).GT.TEN*ULP*TST1 ) THEN
*           IBULGE better not be 1 here or we have a bug!
            NBULGE = MAX(IBULGE -1,1)
            RETURN
         END IF
         DO 120 K = M, N - 1
            NR = MIN( 3, N-K+1 )
            IF( K.GT.M )
     $         CALL SCOPY( NR, H( K, K-1 ), 1, V, 1 )
            CALL SLARFG( NR, V( 1 ), V( 2 ), 1, T1 )
            IF( K.GT.M ) THEN
               H( K, K-1 ) = V( 1 )
               H( K+1, K-1 ) = ZERO
               IF( K.LT.N-1 )
     $            H( K+2, K-1 ) = ZERO
            ELSE
               H( K, K-1 ) = -H( K, K-1 )
            END IF
            V2 = V( 2 )
            T2 = T1*V2
            IF( NR.EQ.3 ) THEN
               V3 = V( 3 )
               T3 = T1*V3
               DO 60 J = K, N
                  SUM = H( K, J ) + V2*H( K+1, J ) + V3*H( K+2, J )
                  H( K, J ) = H( K, J ) - SUM*T1
                  H( K+1, J ) = H( K+1, J ) - SUM*T2
                  H( K+2, J ) = H( K+2, J ) - SUM*T3
   60          CONTINUE
               DO 70 J = 1, MIN( K+3, N )
                  SUM = H( J, K ) + V2*H( J, K+1 ) + V3*H( J, K+2 )
                  H( J, K ) = H( J, K ) - SUM*T1
                  H( J, K+1 ) = H( J, K+1 ) - SUM*T2
                  H( J, K+2 ) = H( J, K+2 ) - SUM*T3
   70          CONTINUE
            END IF
  120    CONTINUE
   10 CONTINUE
* 
      RETURN
      END