Télécharger dlaqr5.eso

Retour à la liste

Numérotation des lignes :

  1. C DLAQR5 SOURCE BP208322 20/09/18 21:16:03 10718
  2. *> \brief \b DLAQR5 performs a single small-bulge multi-shift QR sweep.
  3. *
  4. * =========== DOCUMENTATION ===========
  5. *
  6. * Online html documentation available at
  7. * http://www.netlib.org/lapack/explore-html/
  8. *
  9. *> \htmlonly
  10. *> Download DLAQR5 + dependencies
  11. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaqr5.f">
  12. *> [TGZ]</a>
  13. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaqr5.f">
  14. *> [ZIP]</a>
  15. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaqr5.f">
  16. *> [TXT]</a>
  17. *> \endhtmlonly
  18. *
  19. * Definition:
  20. * ===========
  21. *
  22. * SUBROUTINE DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
  23. * SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
  24. * LDU, NV, WV, LDWV, NH, WH, LDWH )
  25. *
  26. * .. Scalar Arguments ..
  27. * INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
  28. * $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
  29. * LOGICAL WANTT, WANTZ
  30. * ..
  31. * .. Array Arguments ..
  32. * REAL*8 H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
  33. * $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
  34. * $ Z( LDZ, * )
  35. * ..
  36. *
  37. *
  38. *> \par Purpose:
  39. * =============
  40. *>
  41. *> \verbatim
  42. *>
  43. *> DLAQR5, called by DLAQR0, performs a
  44. *> single small-bulge multi-shift QR sweep.
  45. *> \endverbatim
  46. *
  47. * Arguments:
  48. * ==========
  49. *
  50. *> \param[in] WANTT
  51. *> \verbatim
  52. *> WANTT is LOGICAL
  53. *> WANTT = .true. if the quasi-triangular Schur factor
  54. *> is being computed. WANTT is set to .false. otherwise.
  55. *> \endverbatim
  56. *>
  57. *> \param[in] WANTZ
  58. *> \verbatim
  59. *> WANTZ is LOGICAL
  60. *> WANTZ = .true. if the orthogonal Schur factor is being
  61. *> computed. WANTZ is set to .false. otherwise.
  62. *> \endverbatim
  63. *>
  64. *> \param[in] KACC22
  65. *> \verbatim
  66. *> KACC22 is INTEGER with value 0, 1, or 2.
  67. *> Specifies the computation mode of far-from-diagonal
  68. *> orthogonal updates.
  69. *> = 0: DLAQR5 does not accumulate reflections and does not
  70. *> use matrix-matrix multiply to update far-from-diagonal
  71. *> matrix entries.
  72. *> = 1: DLAQR5 accumulates reflections and uses matrix-matrix
  73. *> multiply to update the far-from-diagonal matrix entries.
  74. *> = 2: DLAQR5 accumulates reflections, uses matrix-matrix
  75. *> multiply to update the far-from-diagonal matrix entries,
  76. *> and takes advantage of 2-by-2 block structure during
  77. *> matrix multiplies.
  78. *> \endverbatim
  79. *>
  80. *> \param[in] N
  81. *> \verbatim
  82. *> N is INTEGER
  83. *> N is the order of the Hessenberg matrix H upon which this
  84. *> subroutine operates.
  85. *> \endverbatim
  86. *>
  87. *> \param[in] KTOP
  88. *> \verbatim
  89. *> KTOP is INTEGER
  90. *> \endverbatim
  91. *>
  92. *> \param[in] KBOT
  93. *> \verbatim
  94. *> KBOT is INTEGER
  95. *> These are the first and last rows and columns of an
  96. *> isolated diagonal block upon which the QR sweep is to be
  97. *> applied. It is assumed without a check that
  98. *> either KTOP = 1 or H(KTOP,KTOP-1) = 0
  99. *> and
  100. *> either KBOT = N or H(KBOT+1,KBOT) = 0.
  101. *> \endverbatim
  102. *>
  103. *> \param[in] NSHFTS
  104. *> \verbatim
  105. *> NSHFTS is INTEGER
  106. *> NSHFTS gives the number of simultaneous shifts. NSHFTS
  107. *> must be positive and even.
  108. *> \endverbatim
  109. *>
  110. *> \param[in,out] SR
  111. *> \verbatim
  112. *> SR is REAL*8 array, dimension (NSHFTS)
  113. *> \endverbatim
  114. *>
  115. *> \param[in,out] SI
  116. *> \verbatim
  117. *> SI is REAL*8 array, dimension (NSHFTS)
  118. *> SR contains the real parts and SI contains the imaginary
  119. *> parts of the NSHFTS shifts of origin that define the
  120. *> multi-shift QR sweep. On output SR and SI may be
  121. *> reordered.
  122. *> \endverbatim
  123. *>
  124. *> \param[in,out] H
  125. *> \verbatim
  126. *> H is REAL*8 array, dimension (LDH,N)
  127. *> On input H contains a Hessenberg matrix. On output a
  128. *> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
  129. *> to the isolated diagonal block in rows and columns KTOP
  130. *> through KBOT.
  131. *> \endverbatim
  132. *>
  133. *> \param[in] LDH
  134. *> \verbatim
  135. *> LDH is INTEGER
  136. *> LDH is the leading dimension of H just as declared in the
  137. *> calling procedure. LDH.GE.MAX(1,N).
  138. *> \endverbatim
  139. *>
  140. *> \param[in] ILOZ
  141. *> \verbatim
  142. *> ILOZ is INTEGER
  143. *> \endverbatim
  144. *>
  145. *> \param[in] IHIZ
  146. *> \verbatim
  147. *> IHIZ is INTEGER
  148. *> Specify the rows of Z to which transformations must be
  149. *> applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N
  150. *> \endverbatim
  151. *>
  152. *> \param[in,out] Z
  153. *> \verbatim
  154. *> Z is REAL*8 array, dimension (LDZ,IHIZ)
  155. *> If WANTZ = .TRUE., then the QR Sweep orthogonal
  156. *> similarity transformation is accumulated into
  157. *> Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right.
  158. *> If WANTZ = .FALSE., then Z is unreferenced.
  159. *> \endverbatim
  160. *>
  161. *> \param[in] LDZ
  162. *> \verbatim
  163. *> LDZ is INTEGER
  164. *> LDA is the leading dimension of Z just as declared in
  165. *> the calling procedure. LDZ.GE.N.
  166. *> \endverbatim
  167. *>
  168. *> \param[out] V
  169. *> \verbatim
  170. *> V is REAL*8 array, dimension (LDV,NSHFTS/2)
  171. *> \endverbatim
  172. *>
  173. *> \param[in] LDV
  174. *> \verbatim
  175. *> LDV is INTEGER
  176. *> LDV is the leading dimension of V as declared in the
  177. *> calling procedure. LDV.GE.3.
  178. *> \endverbatim
  179. *>
  180. *> \param[out] U
  181. *> \verbatim
  182. *> U is REAL*8 array, dimension (LDU,3*NSHFTS-3)
  183. *> \endverbatim
  184. *>
  185. *> \param[in] LDU
  186. *> \verbatim
  187. *> LDU is INTEGER
  188. *> LDU is the leading dimension of U just as declared in the
  189. *> in the calling subroutine. LDU.GE.3*NSHFTS-3.
  190. *> \endverbatim
  191. *>
  192. *> \param[in] NH
  193. *> \verbatim
  194. *> NH is INTEGER
  195. *> NH is the number of columns in array WH available for
  196. *> workspace. NH.GE.1.
  197. *> \endverbatim
  198. *>
  199. *> \param[out] WH
  200. *> \verbatim
  201. *> WH is REAL*8 array, dimension (LDWH,NH)
  202. *> \endverbatim
  203. *>
  204. *> \param[in] LDWH
  205. *> \verbatim
  206. *> LDWH is INTEGER
  207. *> Leading dimension of WH just as declared in the
  208. *> calling procedure. LDWH.GE.3*NSHFTS-3.
  209. *> \endverbatim
  210. *>
  211. *> \param[in] NV
  212. *> \verbatim
  213. *> NV is INTEGER
  214. *> NV is the number of rows in WV agailable for workspace.
  215. *> NV.GE.1.
  216. *> \endverbatim
  217. *>
  218. *> \param[out] WV
  219. *> \verbatim
  220. *> WV is REAL*8 array, dimension (LDWV,3*NSHFTS-3)
  221. *> \endverbatim
  222. *>
  223. *> \param[in] LDWV
  224. *> \verbatim
  225. *> LDWV is INTEGER
  226. *> LDWV is the leading dimension of WV as declared in the
  227. *> in the calling subroutine. LDWV.GE.NV.
  228. *> \endverbatim
  229. *
  230. * Authors:
  231. * ========
  232. *
  233. *> \author Univ. of Tennessee
  234. *> \author Univ. of California Berkeley
  235. *> \author Univ. of Colorado Denver
  236. *> \author NAG Ltd.
  237. *
  238. *> \date June 2016
  239. *
  240. *> \ingroup doubleOTHERauxiliary
  241. *
  242. *> \par Contributors:
  243. * ==================
  244. *>
  245. *> Karen Braman and Ralph Byers, Department of Mathematics,
  246. *> University of Kansas, USA
  247. *
  248. *> \par References:
  249. * ================
  250. *>
  251. *> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
  252. *> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
  253. *> Performance, SIAM Journal of Matrix Analysis, volume 23, pages
  254. *> 929--947, 2002.
  255. *>
  256. * =====================================================================
  257. SUBROUTINE DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
  258. $ SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
  259. $ LDU, NV, WV, LDWV, NH, WH, LDWH )
  260. *
  261. * -- LAPACK auxiliary routine (version 3.7.1) --
  262. * -- LAPACK is a software package provided by Univ. of Tennessee, --
  263. * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  264. * June 2016
  265.  
  266. IMPLICIT INTEGER(I-N)
  267. IMPLICIT REAL*8(A-H,O-Z)
  268. *
  269. * .. Scalar Arguments ..
  270. INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
  271. $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
  272. LOGICAL WANTT, WANTZ
  273. * ..
  274. * .. Array Arguments ..
  275. REAL*8 H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
  276. $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
  277. $ Z( LDZ, * )
  278. * ..
  279. *
  280. * ================================================================
  281. * .. Parameters ..
  282. REAL*8 ZERO, ONE
  283. PARAMETER ( ZERO = 0.0d0, ONE = 1.0d0 )
  284. * ..
  285. * .. Local Scalars ..
  286. REAL*8 ALPHA, BETA, H11, H12, H21, H22, REFSUM,
  287. $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, TST1, TST2,
  288. $ ULP
  289. INTEGER I, I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN,
  290. $ JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS,
  291. $ M, M22, MBOT, MEND, MSTART, MTOP, NBMPS, NDCOL,
  292. $ NS, NU
  293. LOGICAL ACCUM, BLK22, BMP22
  294. * ..
  295. * .. External Functions ..
  296. REAL*8 DLAMCH
  297. * EXTERNAL DLAMCH
  298. * ..
  299. * .. Intrinsic Functions ..
  300. *
  301. * INTRINSIC ABS, DBLE, MAX, MIN, MOD
  302. * ..
  303. * .. Local Arrays ..
  304. REAL*8 VT( 3 )
  305. * ..
  306. * .. External Subroutines ..
  307. * EXTERNAL DGEMM, DLABAD, DLACPY, DLAQR1, DLARFG, DLASET,
  308. * $ DTRMM
  309. * ..
  310. * .. Executable Statements ..
  311. *
  312. * ==== If there are no shifts, then there is nothing to do. ====
  313. *
  314. IF( NSHFTS.LT.2 )
  315. $ RETURN
  316. *
  317. * ==== If the active block is empty or 1-by-1, then there
  318. * . is nothing to do. ====
  319. *
  320. IF( KTOP.GE.KBOT )
  321. $ RETURN
  322. *
  323. * ==== Shuffle shifts into pairs of real shifts and pairs
  324. * . of complex conjugate shifts assuming complex
  325. * . conjugate shifts are already adjacent to one
  326. * . another. ====
  327. *
  328. DO 10 I = 1, NSHFTS - 2, 2
  329. IF( SI( I ).NE.-SI( I+1 ) ) THEN
  330. *
  331. SWAP = SR( I )
  332. SR( I ) = SR( I+1 )
  333. SR( I+1 ) = SR( I+2 )
  334. SR( I+2 ) = SWAP
  335. *
  336. SWAP = SI( I )
  337. SI( I ) = SI( I+1 )
  338. SI( I+1 ) = SI( I+2 )
  339. SI( I+2 ) = SWAP
  340. END IF
  341. 10 CONTINUE
  342. *
  343. * ==== NSHFTS is supposed to be even, but if it is odd,
  344. * . then simply reduce it by one. The shuffle above
  345. * . ensures that the dropped shift is real and that
  346. * . the remaining shifts are paired. ====
  347. *
  348. NS = NSHFTS - MOD( NSHFTS, 2 )
  349. *
  350. * ==== Machine constants for deflation ====
  351. *
  352. SAFMIN = DLAMCH( 'SAFE MINIMUM' )
  353. SAFMAX = ONE / SAFMIN
  354. CALL DLABAD( SAFMIN, SAFMAX )
  355. ULP = DLAMCH( 'PRECISION' )
  356. SMLNUM = SAFMIN*( DBLE( N ) / ULP )
  357. *
  358. * ==== Use accumulated reflections to update far-from-diagonal
  359. * . entries ? ====
  360. *
  361. ACCUM = ( KACC22.EQ.1 ) .OR. ( KACC22.EQ.2 )
  362. *
  363. * ==== If so, exploit the 2-by-2 block structure? ====
  364. *
  365. BLK22 = ( NS.GT.2 ) .AND. ( KACC22.EQ.2 )
  366. *
  367. * ==== clear trash ====
  368. *
  369. IF( KTOP+2.LE.KBOT )
  370. $ H( KTOP+2, KTOP ) = ZERO
  371. *
  372. * ==== NBMPS = number of 2-shift bulges in the chain ====
  373. *
  374. NBMPS = NS / 2
  375. *
  376. * ==== KDU = width of slab ====
  377. *
  378. KDU = 6*NBMPS - 3
  379. *
  380. * ==== Create and chase chains of NBMPS bulges ====
  381. *
  382. DO 220 INCOL = 3*( 1-NBMPS ) + KTOP - 1, KBOT - 2, 3*NBMPS - 2
  383. NDCOL = INCOL + KDU
  384. IF( ACCUM )
  385. $ CALL DLASET( 'ALL', KDU, KDU, ZERO, ONE, U, LDU )
  386. *
  387. * ==== Near-the-diagonal bulge chase. The following loop
  388. * . performs the near-the-diagonal part of a small bulge
  389. * . multi-shift QR sweep. Each 6*NBMPS-2 column diagonal
  390. * . chunk extends from column INCOL to column NDCOL
  391. * . (including both column INCOL and column NDCOL). The
  392. * . following loop chases a 3*NBMPS column long chain of
  393. * . NBMPS bulges 3*NBMPS-2 columns to the right. (INCOL
  394. * . may be less than KTOP and and NDCOL may be greater than
  395. * . KBOT indicating phantom columns from which to chase
  396. * . bulges before they are actually introduced or to which
  397. * . to chase bulges beyond column KBOT.) ====
  398. *
  399. DO 150 KRCOL = INCOL, MIN( INCOL+3*NBMPS-3, KBOT-2 )
  400. *
  401. * ==== Bulges number MTOP to MBOT are active double implicit
  402. * . shift bulges. There may or may not also be small
  403. * . 2-by-2 bulge, if there is room. The inactive bulges
  404. * . (if any) must wait until the active bulges have moved
  405. * . down the diagonal to make room. The phantom matrix
  406. * . paradigm described above helps keep track. ====
  407. *
  408. MTOP = MAX( 1, ( ( KTOP-1 )-KRCOL+2 ) / 3+1 )
  409. MBOT = MIN( NBMPS, ( KBOT-KRCOL ) / 3 )
  410. M22 = MBOT + 1
  411. BMP22 = ( MBOT.LT.NBMPS ) .AND. ( KRCOL+3*( M22-1 ) ).EQ.
  412. $ ( KBOT-2 )
  413. *
  414. * ==== Generate reflections to chase the chain right
  415. * . one column. (The minimum value of K is KTOP-1.) ====
  416. *
  417. DO 20 M = MTOP, MBOT
  418. K = KRCOL + 3*( M-1 )
  419. IF( K.EQ.KTOP-1 ) THEN
  420. CALL DLAQR1( 3, H( KTOP, KTOP ), LDH, SR( 2*M-1 ),
  421. $ SI( 2*M-1 ), SR( 2*M ), SI( 2*M ),
  422. $ V( 1, M ) )
  423. ALPHA = V( 1, M )
  424. CALL DLARFG( 3, ALPHA, V( 2, M ), 1, V( 1, M ) )
  425. ELSE
  426. BETA = H( K+1, K )
  427. V( 2, M ) = H( K+2, K )
  428. V( 3, M ) = H( K+3, K )
  429. CALL DLARFG( 3, BETA, V( 2, M ), 1, V( 1, M ) )
  430. *
  431. * ==== A Bulge may collapse because of vigilant
  432. * . deflation or destructive underflow. In the
  433. * . underflow case, try the two-small-subdiagonals
  434. * . trick to try to reinflate the bulge. ====
  435. *
  436. IF( H( K+3, K ).NE.ZERO .OR. H( K+3, K+1 ).NE.
  437. $ ZERO .OR. H( K+3, K+2 ).EQ.ZERO ) THEN
  438. *
  439. * ==== Typical case: not collapsed (yet). ====
  440. *
  441. H( K+1, K ) = BETA
  442. H( K+2, K ) = ZERO
  443. H( K+3, K ) = ZERO
  444. ELSE
  445. *
  446. * ==== Atypical case: collapsed. Attempt to
  447. * . reintroduce ignoring H(K+1,K) and H(K+2,K).
  448. * . If the fill resulting from the new
  449. * . reflector is too large, then abandon it.
  450. * . Otherwise, use the new one. ====
  451. *
  452. CALL DLAQR1( 3, H( K+1, K+1 ), LDH, SR( 2*M-1 ),
  453. $ SI( 2*M-1 ), SR( 2*M ), SI( 2*M ),
  454. $ VT )
  455. ALPHA = VT( 1 )
  456. CALL DLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) )
  457. REFSUM = VT( 1 )*( H( K+1, K )+VT( 2 )*
  458. $ H( K+2, K ) )
  459. *
  460. IF( ABS( H( K+2, K )-REFSUM*VT( 2 ) )+
  461. $ ABS( REFSUM*VT( 3 ) ).GT.ULP*
  462. $ ( ABS( H( K, K ) )+ABS( H( K+1,
  463. $ K+1 ) )+ABS( H( K+2, K+2 ) ) ) ) THEN
  464. *
  465. * ==== Starting a new bulge here would
  466. * . create non-negligible fill. Use
  467. * . the old one with trepidation. ====
  468. *
  469. H( K+1, K ) = BETA
  470. H( K+2, K ) = ZERO
  471. H( K+3, K ) = ZERO
  472. ELSE
  473. *
  474. * ==== Stating a new bulge here would
  475. * . create only negligible fill.
  476. * . Replace the old reflector with
  477. * . the new one. ====
  478. *
  479. H( K+1, K ) = H( K+1, K ) - REFSUM
  480. H( K+2, K ) = ZERO
  481. H( K+3, K ) = ZERO
  482. V( 1, M ) = VT( 1 )
  483. V( 2, M ) = VT( 2 )
  484. V( 3, M ) = VT( 3 )
  485. END IF
  486. END IF
  487. END IF
  488. 20 CONTINUE
  489. *
  490. * ==== Generate a 2-by-2 reflection, if needed. ====
  491. *
  492. K = KRCOL + 3*( M22-1 )
  493. IF( BMP22 ) THEN
  494. IF( K.EQ.KTOP-1 ) THEN
  495. CALL DLAQR1( 2, H( K+1, K+1 ), LDH, SR( 2*M22-1 ),
  496. $ SI( 2*M22-1 ), SR( 2*M22 ), SI( 2*M22 ),
  497. $ V( 1, M22 ) )
  498. BETA = V( 1, M22 )
  499. CALL DLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) )
  500. ELSE
  501. BETA = H( K+1, K )
  502. V( 2, M22 ) = H( K+2, K )
  503. CALL DLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) )
  504. H( K+1, K ) = BETA
  505. H( K+2, K ) = ZERO
  506. END IF
  507. END IF
  508. *
  509. * ==== Multiply H by reflections from the left ====
  510. *
  511. IF( ACCUM ) THEN
  512. JBOT = MIN( NDCOL, KBOT )
  513. ELSE IF( WANTT ) THEN
  514. JBOT = N
  515. ELSE
  516. JBOT = KBOT
  517. END IF
  518. DO 40 J = MAX( KTOP, KRCOL ), JBOT
  519. MEND = MIN( MBOT, ( J-KRCOL+2 ) / 3 )
  520. DO 30 M = MTOP, MEND
  521. K = KRCOL + 3*( M-1 )
  522. REFSUM = V( 1, M )*( H( K+1, J )+V( 2, M )*
  523. $ H( K+2, J )+V( 3, M )*H( K+3, J ) )
  524. H( K+1, J ) = H( K+1, J ) - REFSUM
  525. H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M )
  526. H( K+3, J ) = H( K+3, J ) - REFSUM*V( 3, M )
  527. 30 CONTINUE
  528. 40 CONTINUE
  529. IF( BMP22 ) THEN
  530. K = KRCOL + 3*( M22-1 )
  531. DO 50 J = MAX( K+1, KTOP ), JBOT
  532. REFSUM = V( 1, M22 )*( H( K+1, J )+V( 2, M22 )*
  533. $ H( K+2, J ) )
  534. H( K+1, J ) = H( K+1, J ) - REFSUM
  535. H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M22 )
  536. 50 CONTINUE
  537. END IF
  538. *
  539. * ==== Multiply H by reflections from the right.
  540. * . Delay filling in the last row until the
  541. * . vigilant deflation check is complete. ====
  542. *
  543. IF( ACCUM ) THEN
  544. JTOP = MAX( KTOP, INCOL )
  545. ELSE IF( WANTT ) THEN
  546. JTOP = 1
  547. ELSE
  548. JTOP = KTOP
  549. END IF
  550. DO 90 M = MTOP, MBOT
  551. IF( V( 1, M ).NE.ZERO ) THEN
  552. K = KRCOL + 3*( M-1 )
  553. DO 60 J = JTOP, MIN( KBOT, K+3 )
  554. REFSUM = V( 1, M )*( H( J, K+1 )+V( 2, M )*
  555. $ H( J, K+2 )+V( 3, M )*H( J, K+3 ) )
  556. H( J, K+1 ) = H( J, K+1 ) - REFSUM
  557. H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M )
  558. H( J, K+3 ) = H( J, K+3 ) - REFSUM*V( 3, M )
  559. 60 CONTINUE
  560. *
  561. IF( ACCUM ) THEN
  562. *
  563. * ==== Accumulate U. (If necessary, update Z later
  564. * . with with an efficient matrix-matrix
  565. * . multiply.) ====
  566. *
  567. KMS = K - INCOL
  568. DO 70 J = MAX( 1, KTOP-INCOL ), KDU
  569. REFSUM = V( 1, M )*( U( J, KMS+1 )+V( 2, M )*
  570. $ U( J, KMS+2 )+V( 3, M )*U( J, KMS+3 ) )
  571. U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM
  572. U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM*V( 2, M )
  573. U( J, KMS+3 ) = U( J, KMS+3 ) - REFSUM*V( 3, M )
  574. 70 CONTINUE
  575. ELSE IF( WANTZ ) THEN
  576. *
  577. * ==== U is not accumulated, so update Z
  578. * . now by multiplying by reflections
  579. * . from the right. ====
  580. *
  581. DO 80 J = ILOZ, IHIZ
  582. REFSUM = V( 1, M )*( Z( J, K+1 )+V( 2, M )*
  583. $ Z( J, K+2 )+V( 3, M )*Z( J, K+3 ) )
  584. Z( J, K+1 ) = Z( J, K+1 ) - REFSUM
  585. Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M )
  586. Z( J, K+3 ) = Z( J, K+3 ) - REFSUM*V( 3, M )
  587. 80 CONTINUE
  588. END IF
  589. END IF
  590. 90 CONTINUE
  591. *
  592. * ==== Special case: 2-by-2 reflection (if needed) ====
  593. *
  594. K = KRCOL + 3*( M22-1 )
  595. IF( BMP22 ) THEN
  596. IF ( V( 1, M22 ).NE.ZERO ) THEN
  597. DO 100 J = JTOP, MIN( KBOT, K+3 )
  598. REFSUM = V( 1, M22 )*( H( J, K+1 )+V( 2, M22 )*
  599. $ H( J, K+2 ) )
  600. H( J, K+1 ) = H( J, K+1 ) - REFSUM
  601. H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M22 )
  602. 100 CONTINUE
  603. *
  604. IF( ACCUM ) THEN
  605. KMS = K - INCOL
  606. DO 110 J = MAX( 1, KTOP-INCOL ), KDU
  607. REFSUM = V( 1, M22 )*( U( J, KMS+1 )+
  608. $ V( 2, M22 )*U( J, KMS+2 ) )
  609. U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM
  610. U( J, KMS+2 ) = U( J, KMS+2 ) -
  611. $ REFSUM*V( 2, M22 )
  612. 110 CONTINUE
  613. ELSE IF( WANTZ ) THEN
  614. DO 120 J = ILOZ, IHIZ
  615. REFSUM = V( 1, M22 )*( Z( J, K+1 )+V( 2, M22 )*
  616. $ Z( J, K+2 ) )
  617. Z( J, K+1 ) = Z( J, K+1 ) - REFSUM
  618. Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M22 )
  619. 120 CONTINUE
  620. END IF
  621. END IF
  622. END IF
  623. *
  624. * ==== Vigilant deflation check ====
  625. *
  626. MSTART = MTOP
  627. IF( KRCOL+3*( MSTART-1 ).LT.KTOP )
  628. $ MSTART = MSTART + 1
  629. MEND = MBOT
  630. IF( BMP22 )
  631. $ MEND = MEND + 1
  632. IF( KRCOL.EQ.KBOT-2 )
  633. $ MEND = MEND + 1
  634. DO 130 M = MSTART, MEND
  635. K = MIN( KBOT-1, KRCOL+3*( M-1 ) )
  636. *
  637. * ==== The following convergence test requires that
  638. * . the tradition small-compared-to-nearby-diagonals
  639. * . criterion and the Ahues & Tisseur (LAWN 122, 1997)
  640. * . criteria both be satisfied. The latter improves
  641. * . accuracy in some examples. Falling back on an
  642. * . alternate convergence criterion when TST1 or TST2
  643. * . is zero (as done here) is traditional but probably
  644. * . unnecessary. ====
  645. *
  646. IF( H( K+1, K ).NE.ZERO ) THEN
  647. TST1 = ABS( H( K, K ) ) + ABS( H( K+1, K+1 ) )
  648. IF( TST1.EQ.ZERO ) THEN
  649. IF( K.GE.KTOP+1 )
  650. $ TST1 = TST1 + ABS( H( K, K-1 ) )
  651. IF( K.GE.KTOP+2 )
  652. $ TST1 = TST1 + ABS( H( K, K-2 ) )
  653. IF( K.GE.KTOP+3 )
  654. $ TST1 = TST1 + ABS( H( K, K-3 ) )
  655. IF( K.LE.KBOT-2 )
  656. $ TST1 = TST1 + ABS( H( K+2, K+1 ) )
  657. IF( K.LE.KBOT-3 )
  658. $ TST1 = TST1 + ABS( H( K+3, K+1 ) )
  659. IF( K.LE.KBOT-4 )
  660. $ TST1 = TST1 + ABS( H( K+4, K+1 ) )
  661. END IF
  662. IF( ABS( H( K+1, K ) ).LE.MAX( SMLNUM, ULP*TST1 ) )
  663. $ THEN
  664. H12 = MAX( ABS( H( K+1, K ) ), ABS( H( K, K+1 ) ) )
  665. H21 = MIN( ABS( H( K+1, K ) ), ABS( H( K, K+1 ) ) )
  666. H11 = MAX( ABS( H( K+1, K+1 ) ),
  667. $ ABS( H( K, K )-H( K+1, K+1 ) ) )
  668. H22 = MIN( ABS( H( K+1, K+1 ) ),
  669. $ ABS( H( K, K )-H( K+1, K+1 ) ) )
  670. SCL = H11 + H12
  671. TST2 = H22*( H11 / SCL )
  672. *
  673. IF( TST2.EQ.ZERO .OR. H21*( H12 / SCL ).LE.
  674. $ MAX( SMLNUM, ULP*TST2 ) )H( K+1, K ) = ZERO
  675. END IF
  676. END IF
  677. 130 CONTINUE
  678. *
  679. * ==== Fill in the last row of each bulge. ====
  680. *
  681. MEND = MIN( NBMPS, ( KBOT-KRCOL-1 ) / 3 )
  682. DO 140 M = MTOP, MEND
  683. K = KRCOL + 3*( M-1 )
  684. REFSUM = V( 1, M )*V( 3, M )*H( K+4, K+3 )
  685. H( K+4, K+1 ) = -REFSUM
  686. H( K+4, K+2 ) = -REFSUM*V( 2, M )
  687. H( K+4, K+3 ) = H( K+4, K+3 ) - REFSUM*V( 3, M )
  688. 140 CONTINUE
  689. *
  690. * ==== End of near-the-diagonal bulge chase. ====
  691. *
  692. 150 CONTINUE
  693. *
  694. * ==== Use U (if accumulated) to update far-from-diagonal
  695. * . entries in H. If required, use U to update Z as
  696. * . well. ====
  697. *
  698. IF( ACCUM ) THEN
  699. IF( WANTT ) THEN
  700. JTOP = 1
  701. JBOT = N
  702. ELSE
  703. JTOP = KTOP
  704. JBOT = KBOT
  705. END IF
  706. IF( ( .NOT.BLK22 ) .OR. ( INCOL.LT.KTOP ) .OR.
  707. $ ( NDCOL.GT.KBOT ) .OR. ( NS.LE.2 ) ) THEN
  708. *
  709. * ==== Updates not exploiting the 2-by-2 block
  710. * . structure of U. K1 and NU keep track of
  711. * . the location and size of U in the special
  712. * . cases of introducing bulges and chasing
  713. * . bulges off the bottom. In these special
  714. * . cases and in case the number of shifts
  715. * . is NS = 2, there is no 2-by-2 block
  716. * . structure to exploit. ====
  717. *
  718. K1 = MAX( 1, KTOP-INCOL )
  719. NU = ( KDU-MAX( 0, NDCOL-KBOT ) ) - K1 + 1
  720. *
  721. * ==== Horizontal Multiply ====
  722. *
  723. DO 160 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH
  724. JLEN = MIN( NH, JBOT-JCOL+1 )
  725. CALL DGEMM( 'C', 'N', NU, JLEN, NU, ONE, U( K1, K1 ),
  726. $ LDU, H( INCOL+K1, JCOL ), LDH, ZERO, WH,
  727. $ LDWH )
  728. CALL DLACPY( 'ALL', NU, JLEN, WH, LDWH,
  729. $ H( INCOL+K1, JCOL ), LDH )
  730. 160 CONTINUE
  731. *
  732. * ==== Vertical multiply ====
  733. *
  734. DO 170 JROW = JTOP, MAX( KTOP, INCOL ) - 1, NV
  735. JLEN = MIN( NV, MAX( KTOP, INCOL )-JROW )
  736. CALL DGEMM( 'N', 'N', JLEN, NU, NU, ONE,
  737. $ H( JROW, INCOL+K1 ), LDH, U( K1, K1 ),
  738. $ LDU, ZERO, WV, LDWV )
  739. CALL DLACPY( 'ALL', JLEN, NU, WV, LDWV,
  740. $ H( JROW, INCOL+K1 ), LDH )
  741. 170 CONTINUE
  742. *
  743. * ==== Z multiply (also vertical) ====
  744. *
  745. IF( WANTZ ) THEN
  746. DO 180 JROW = ILOZ, IHIZ, NV
  747. JLEN = MIN( NV, IHIZ-JROW+1 )
  748. CALL DGEMM( 'N', 'N', JLEN, NU, NU, ONE,
  749. $ Z( JROW, INCOL+K1 ), LDZ, U( K1, K1 ),
  750. $ LDU, ZERO, WV, LDWV )
  751. CALL DLACPY( 'ALL', JLEN, NU, WV, LDWV,
  752. $ Z( JROW, INCOL+K1 ), LDZ )
  753. 180 CONTINUE
  754. END IF
  755. ELSE
  756. *
  757. * ==== Updates exploiting U's 2-by-2 block structure.
  758. * . (I2, I4, J2, J4 are the last rows and columns
  759. * . of the blocks.) ====
  760. *
  761. I2 = ( KDU+1 ) / 2
  762. I4 = KDU
  763. J2 = I4 - I2
  764. J4 = KDU
  765. *
  766. * ==== KZS and KNZ deal with the band of zeros
  767. * . along the diagonal of one of the triangular
  768. * . blocks. ====
  769. *
  770. KZS = ( J4-J2 ) - ( NS+1 )
  771. KNZ = NS + 1
  772. *
  773. * ==== Horizontal multiply ====
  774. *
  775. DO 190 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH
  776. JLEN = MIN( NH, JBOT-JCOL+1 )
  777. *
  778. * ==== Copy bottom of H to top+KZS of scratch ====
  779. * (The first KZS rows get multiplied by zero.) ====
  780. *
  781. CALL DLACPY( 'ALL', KNZ, JLEN, H( INCOL+1+J2, JCOL ),
  782. $ LDH, WH( KZS+1, 1 ), LDWH )
  783. *
  784. * ==== Multiply by U21**T ====
  785. *
  786. CALL DLASET( 'ALL', KZS, JLEN, ZERO, ZERO, WH, LDWH )
  787. CALL DTRMM( 'L', 'U', 'C', 'N', KNZ, JLEN, ONE,
  788. $ U( J2+1, 1+KZS ), LDU, WH( KZS+1, 1 ),
  789. $ LDWH )
  790. *
  791. * ==== Multiply top of H by U11**T ====
  792. *
  793. CALL DGEMM( 'C', 'N', I2, JLEN, J2, ONE, U, LDU,
  794. $ H( INCOL+1, JCOL ), LDH, ONE, WH, LDWH )
  795. *
  796. * ==== Copy top of H to bottom of WH ====
  797. *
  798. CALL DLACPY( 'ALL', J2, JLEN, H( INCOL+1, JCOL ), LDH,
  799. $ WH( I2+1, 1 ), LDWH )
  800. *
  801. * ==== Multiply by U21**T ====
  802. *
  803. CALL DTRMM( 'L', 'L', 'C', 'N', J2, JLEN, ONE,
  804. $ U( 1, I2+1 ), LDU, WH( I2+1, 1 ), LDWH )
  805. *
  806. * ==== Multiply by U22 ====
  807. *
  808. CALL DGEMM( 'C', 'N', I4-I2, JLEN, J4-J2, ONE,
  809. $ U( J2+1, I2+1 ), LDU,
  810. $ H( INCOL+1+J2, JCOL ), LDH, ONE,
  811. $ WH( I2+1, 1 ), LDWH )
  812. *
  813. * ==== Copy it back ====
  814. *
  815. CALL DLACPY( 'ALL', KDU, JLEN, WH, LDWH,
  816. $ H( INCOL+1, JCOL ), LDH )
  817. 190 CONTINUE
  818. *
  819. * ==== Vertical multiply ====
  820. *
  821. DO 200 JROW = JTOP, MAX( INCOL, KTOP ) - 1, NV
  822. JLEN = MIN( NV, MAX( INCOL, KTOP )-JROW )
  823. *
  824. * ==== Copy right of H to scratch (the first KZS
  825. * . columns get multiplied by zero) ====
  826. *
  827. CALL DLACPY( 'ALL', JLEN, KNZ, H( JROW, INCOL+1+J2 ),
  828. $ LDH, WV( 1, 1+KZS ), LDWV )
  829. *
  830. * ==== Multiply by U21 ====
  831. *
  832. CALL DLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV, LDWV )
  833. CALL DTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE,
  834. $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ),
  835. $ LDWV )
  836. *
  837. * ==== Multiply by U11 ====
  838. *
  839. CALL DGEMM( 'N', 'N', JLEN, I2, J2, ONE,
  840. $ H( JROW, INCOL+1 ), LDH, U, LDU, ONE, WV,
  841. $ LDWV )
  842. *
  843. * ==== Copy left of H to right of scratch ====
  844. *
  845. CALL DLACPY( 'ALL', JLEN, J2, H( JROW, INCOL+1 ), LDH,
  846. $ WV( 1, 1+I2 ), LDWV )
  847. *
  848. * ==== Multiply by U21 ====
  849. *
  850. CALL DTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE,
  851. $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ), LDWV )
  852. *
  853. * ==== Multiply by U22 ====
  854. *
  855. CALL DGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE,
  856. $ H( JROW, INCOL+1+J2 ), LDH,
  857. $ U( J2+1, I2+1 ), LDU, ONE, WV( 1, 1+I2 ),
  858. $ LDWV )
  859. *
  860. * ==== Copy it back ====
  861. *
  862. CALL DLACPY( 'ALL', JLEN, KDU, WV, LDWV,
  863. $ H( JROW, INCOL+1 ), LDH )
  864. 200 CONTINUE
  865. *
  866. * ==== Multiply Z (also vertical) ====
  867. *
  868. IF( WANTZ ) THEN
  869. DO 210 JROW = ILOZ, IHIZ, NV
  870. JLEN = MIN( NV, IHIZ-JROW+1 )
  871. *
  872. * ==== Copy right of Z to left of scratch (first
  873. * . KZS columns get multiplied by zero) ====
  874. *
  875. CALL DLACPY( 'ALL', JLEN, KNZ,
  876. $ Z( JROW, INCOL+1+J2 ), LDZ,
  877. $ WV( 1, 1+KZS ), LDWV )
  878. *
  879. * ==== Multiply by U12 ====
  880. *
  881. CALL DLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV,
  882. $ LDWV )
  883. CALL DTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE,
  884. $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ),
  885. $ LDWV )
  886. *
  887. * ==== Multiply by U11 ====
  888. *
  889. CALL DGEMM( 'N', 'N', JLEN, I2, J2, ONE,
  890. $ Z( JROW, INCOL+1 ), LDZ, U, LDU, ONE,
  891. $ WV, LDWV )
  892. *
  893. * ==== Copy left of Z to right of scratch ====
  894. *
  895. CALL DLACPY( 'ALL', JLEN, J2, Z( JROW, INCOL+1 ),
  896. $ LDZ, WV( 1, 1+I2 ), LDWV )
  897. *
  898. * ==== Multiply by U21 ====
  899. *
  900. CALL DTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE,
  901. $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ),
  902. $ LDWV )
  903. *
  904. * ==== Multiply by U22 ====
  905. *
  906. CALL DGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE,
  907. $ Z( JROW, INCOL+1+J2 ), LDZ,
  908. $ U( J2+1, I2+1 ), LDU, ONE,
  909. $ WV( 1, 1+I2 ), LDWV )
  910. *
  911. * ==== Copy the result back to Z ====
  912. *
  913. CALL DLACPY( 'ALL', JLEN, KDU, WV, LDWV,
  914. $ Z( JROW, INCOL+1 ), LDZ )
  915. 210 CONTINUE
  916. END IF
  917. END IF
  918. END IF
  919. 220 CONTINUE
  920. *
  921. * ==== End of DLAQR5 ====
  922. *
  923. END
  924.  
  925.  
  926.  

© Cast3M 2003 - Tous droits réservés.
Mentions légales