|
SRC\pdlahqr.f |
|
| #lines: 2132 size: 90 Kb creation: 29/03/2007 01:44:42 last modification: 08/05/2008 18:37:53 attribute: ARCH Find Reload | |
1: 2: 3: 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 16: 17: 18: 19: 20: 21: 22: 23: 24: 25: 26: 27: 28: 29: 30: 31: 32: 33: 34: 35: 36: 37: 38: 39: 40: 41: 42: 43: 44: 45: 46: 47: 48: 49: 50: 51: 52: 53: 54: 55: 56: 57: 58: 59: 60: 61: 62: 63: 64: 65: 66: 67: 68: 69: 70: 71: 72: 73: 74: 75: 76: 77: 78: 79: 80: 81: 82: 83: 84: 85: 86: 87: 88: 89: 90: 91: 92: 93: 94: 95: 96: 97: 98: 99: 100: 101: 102: 103: 104: 105: 106: 107: 108: 109: 110: 111: 112: 113: 114: 115: 116: 117: 118: 119: 120: 121: 122: 123: 124: 125: 126: 127: 128: 129: 130: 131: 132: 133: 134: 135: 136: 137: 138: 139: 140: 141: 142: 143: 144: 145: 146: 147: 148: 149: 150: 151: 152: 153: 154: 155: 156: 157: 158: 159: 160: 161: 162: 163: 164: 165: 166: 167: 168: 169: 170: 171: 172: 173: 174: 175: 176: 177: 178: 179: 180: 181: 182: 183: 184: 185: 186: 187: 188: 189: 190: 191: 192: 193: 194: 195: 196: 197: 198: 199: 200: 201: 202: 203: 204: 205: 206: 207: 208: 209: 210: 211: 212: 213: 214: 215: 216: 217: 218: 219: 220: 221: 222: 223: 224: 225: 226: 227: 228: 229: 230: 231: 232: 233: 234: 235: 236: 237: 238: 239: 240: 241: 242: 243: 244: 245: 246: 247: 248: 249: 250: 251: 252: 253: 254: 255: 256: 257: 258: 259: 260: 261: 262: 263: 264: 265: 266: 267: 268: 269: 270: 271: 272: 273: 274: 275: 276: 277: 278: 279: 280: 281: 282: 283: 284: 285: 286: 287: 288: 289: 290: 291: 292: 293: 294: 295: 296: 297: 298: 299: 300: 301: 302: 303: 304: 305: 306: 307: 308: 309: 310: 311: 312: 313: 314: 315: 316: 317: 318: 319: 320: 321: 322: 323: 324: 325: 326: 327: 328: 329: 330: 331: 332: 333: 334: 335: 336: 337: 338: 339: 340: 341: 342: 343: 344: 345: 346: 347: 348: 349: 350: 351: 352: 353: 354: 355: 356: 357: 358: 359: 360: 361: 362: 363: 364: 365: 366: 367: 368: 369: 370: 371: 372: 373: 374: 375: 376: 377: 378: 379: 380: 381: 382: 383: 384: 385: 386: 387: 388: 389: 390: 391: 392: 393: 394: 395: 396: 397: 398: 399: 400: 401: 402: 403: 404: 405: 406: 407: 408: 409: 410: 411: 412: 413: 414: 415: 416: 417: 418: 419: 420: 421: 422: 423: 424: 425: 426: 427: 428: 429: 430: 431: 432: 433: 434: 435: 436: 437: 438: 439: 440: 441: 442: 443: 444: 445: 446: 447: 448: 449: 450: 451: 452: 453: 454: 455: 456: 457: 458: 459: 460: 461: 462: 463: 464: 465: 466: 467: 468: 469: 470: 471: 472: 473: 474: 475: 476: 477: 478: 479: 480: 481: 482: 483: 484: 485: 486: 487: 488: 489: 490: 491: 492: 493: 494: 495: 496: 497: 498: 499: 500: 501: 502: 503: 504: 505: 506: 507: 508: 509: 510: 511: 512: 513: 514: 515: 516: 517: 518: 519: 520: 521: 522: 523: 524: 525: 526: 527: 528: 529: 530: 531: 532: 533: 534: 535: 536: 537: 538: 539: 540: 541: 542: 543: 544: 545: 546: 547: 548: 549: 550: 551: 552: 553: 554: 555: 556: 557: 558: 559: 560: 561: 562: 563: 564: 565: 566: 567: 568: 569: 570: 571: 572: 573: 574: 575: 576: 577: 578: 579: 580: 581: 582: 583: 584: 585: 586: 587: 588: 589: 590: 591: 592: 593: 594: 595: 596: 597: 598: 599: 600: 601: 602: 603: 604: 605: 606: 607: 608: 609: 610: 611: 612: 613: 614: 615: 616: 617: 618: 619: 620: 621: 622: 623: 624: 625: 626: 627: 628: 629: 630: 631: 632: 633: 634: 635: 636: 637: 638: 639: 640: 641: 642: 643: 644: 645: 646: 647: 648: 649: 650: 651: 652: 653: 654: 655: 656: 657: 658: 659: 660: 661: 662: 663: 664: 665: 666: 667: 668: 669: 670: 671: 672: 673: 674: 675: 676: 677: 678: 679: 680: 681: 682: 683: 684: 685: 686: 687: 688: 689: 690: 691: 692: 693: 694: 695: 696: 697: 698: 699: 700: 701: 702: 703: 704: 705: 706: 707: 708: 709: 710: 711: 712: 713: 714: 715: 716: 717: 718: 719: 720: 721: 722: 723: 724: 725: 726: 727: 728: 729: 730: 731: 732: 733: 734: 735: 736: 737: 738: 739: 740: 741: 742: 743: 744: 745: 746: 747: 748: 749: 750: 751: 752: 753: 754: 755: 756: 757: 758: 759: 760: 761: 762: 763: 764: 765: 766: 767: 768: 769: 770: 771: 772: 773: 774: 775: 776: 777: 778: 779: 780: 781: 782: 783: 784: 785: 786: 787: 788: 789: 790: 791: 792: 793: 794: 795: 796: 797: 798: 799: 800: 801: 802: 803: 804: 805: 806: 807: 808: 809: 810: 811: 812: 813: 814: 815: 816: 817: 818: 819: 820: 821: 822: 823: 824: 825: 826: 827: 828: 829: 830: 831: 832: 833: 834: 835: 836: 837: 838: 839: 840: 841: 842: 843: 844: 845: 846: 847: 848: 849: 850: 851: 852: 853: 854: 855: 856: 857: 858: 859: 860: 861: 862: 863: 864: 865: 866: 867: 868: 869: 870: 871: 872: 873: 874: 875: 876: 877: 878: 879: 880: 881: 882: 883: 884: 885: 886: 887: 888: 889: 890: 891: 892: 893: 894: 895: 896: 897: 898: 899: 900: 901: 902: 903: 904: 905: 906: 907: 908: 909: 910: 911: 912: 913: 914: 915: 916: 917: 918: 919: 920: 921: 922: 923: 924: 925: 926: 927: 928: 929: 930: 931: 932: 933: 934: 935: 936: 937: 938: 939: 940: 941: 942: 943: 944: 945: 946: 947: 948: 949: 950: 951: 952: 953: 954: 955: 956: 957: 958: 959: 960: 961: 962: 963: 964: 965: 966: 967: 968: 969: 970: 971: 972: 973: 974: 975: 976: 977: 978: 979: 980: 981: 982: 983: 984: 985: 986: 987: 988: 989: 990: 991: 992: 993: 994: 995: 996: 997: 998: 999: 1000: 1001: 1002: 1003: 1004: 1005: 1006: 1007: 1008: 1009: 1010: 1011: 1012: 1013: 1014: 1015: 1016: 1017: 1018: 1019: 1020: 1021: 1022: 1023: 1024: 1025: 1026: 1027: 1028: 1029: 1030: 1031: 1032: 1033: 1034: 1035: 1036: 1037: 1038: 1039: 1040: 1041: 1042: 1043: 1044: 1045: 1046: 1047: 1048: 1049: 1050: 1051: 1052: 1053: 1054: 1055: 1056: 1057: 1058: 1059: 1060: 1061: 1062: 1063: 1064: 1065: 1066: 1067: 1068: 1069: 1070: 1071: 1072: 1073: 1074: 1075: 1076: 1077: 1078: 1079: 1080: 1081: 1082: 1083: 1084: 1085: 1086: 1087: 1088: 1089: 1090: 1091: 1092: 1093: 1094: 1095: 1096: 1097: 1098: 1099: 1100: 1101: 1102: 1103: 1104: 1105: 1106: 1107: 1108: 1109: 1110: 1111: 1112: 1113: 1114: 1115: 1116: 1117: 1118: 1119: 1120: 1121: 1122: 1123: 1124: 1125: 1126: 1127: 1128: 1129: 1130: 1131: 1132: 1133: 1134: 1135: 1136: 1137: 1138: 1139: 1140: 1141: 1142: 1143: 1144: 1145: 1146: 1147: 1148: 1149: 1150: 1151: 1152: 1153: 1154: 1155: 1156: 1157: 1158: 1159: 1160: 1161: 1162: 1163: 1164: 1165: 1166: 1167: 1168: 1169: 1170: 1171: 1172: 1173: 1174: 1175: 1176: 1177: 1178: 1179: 1180: 1181: 1182: 1183: 1184: 1185: 1186: 1187: 1188: 1189: 1190: 1191: 1192: 1193: 1194: 1195: 1196: 1197: 1198: 1199: 1200: 1201: 1202: 1203: 1204: 1205: 1206: 1207: 1208: 1209: 1210: 1211: 1212: 1213: 1214: 1215: 1216: 1217: 1218: 1219: 1220: 1221: 1222: 1223: 1224: 1225: 1226: 1227: 1228: 1229: 1230: 1231: 1232: 1233: 1234: 1235: 1236: 1237: 1238: 1239: 1240: 1241: 1242: 1243: 1244: 1245: 1246: 1247: 1248: 1249: 1250: 1251: 1252: 1253: 1254: 1255: 1256: 1257: 1258: 1259: 1260: 1261: 1262: 1263: 1264: 1265: 1266: 1267: 1268: 1269: 1270: 1271: 1272: 1273: 1274: 1275: 1276: 1277: 1278: 1279: 1280: 1281: 1282: 1283: 1284: 1285: 1286: 1287: 1288: 1289: 1290: 1291: 1292: 1293: 1294: 1295: 1296: 1297: 1298: 1299: 1300: 1301: 1302: 1303: 1304: 1305: 1306: 1307: 1308: 1309: 1310: 1311: 1312: 1313: 1314: 1315: 1316: 1317: 1318: 1319: 1320: 1321: 1322: 1323: 1324: 1325: 1326: 1327: 1328: 1329: 1330: 1331: 1332: 1333: 1334: 1335: 1336: 1337: 1338: 1339: 1340: 1341: 1342: 1343: 1344: 1345: 1346: 1347: 1348: 1349: 1350: 1351: 1352: 1353: 1354: 1355: 1356: 1357: 1358: 1359: 1360: 1361: 1362: 1363: 1364: 1365: 1366: 1367: 1368: 1369: 1370: 1371: 1372: 1373: 1374: 1375: 1376: 1377: 1378: 1379: 1380: 1381: 1382: 1383: 1384: 1385: 1386: 1387: 1388: 1389: 1390: 1391: 1392: 1393: 1394: 1395: 1396: 1397: 1398: 1399: 1400: 1401: 1402: 1403: 1404: 1405: 1406: 1407: 1408: 1409: 1410: 1411: 1412: 1413: 1414: 1415: 1416: 1417: 1418: 1419: 1420: 1421: 1422: 1423: 1424: 1425: 1426: 1427: 1428: 1429: 1430: 1431: 1432: 1433: 1434: 1435: 1436: 1437: 1438: 1439: 1440: 1441: 1442: 1443: 1444: 1445: 1446: 1447: 1448: 1449: 1450: 1451: 1452: 1453: 1454: 1455: 1456: 1457: 1458: 1459: 1460: 1461: 1462: 1463: 1464: 1465: 1466: 1467: 1468: 1469: 1470: 1471: 1472: 1473: 1474: 1475: 1476: 1477: 1478: 1479: 1480: 1481: 1482: 1483: 1484: 1485: 1486: 1487: 1488: 1489: 1490: 1491: 1492: 1493: 1494: 1495: 1496: 1497: 1498: 1499: 1500: 1501: 1502: 1503: 1504: 1505: 1506: 1507: 1508: 1509: 1510: 1511: 1512: 1513: 1514: 1515: 1516: 1517: 1518: 1519: 1520: 1521: 1522: 1523: 1524: 1525: 1526: 1527: 1528: 1529: 1530: 1531: 1532: 1533: 1534: 1535: 1536: 1537: 1538: 1539: 1540: 1541: 1542: 1543: 1544: 1545: 1546: 1547: 1548: 1549: 1550: 1551: 1552: 1553: 1554: 1555: 1556: 1557: 1558: 1559: 1560: 1561: 1562: 1563: 1564: 1565: 1566: 1567: 1568: 1569: 1570: 1571: 1572: 1573: 1574: 1575: 1576: 1577: 1578: 1579: 1580: 1581: 1582: 1583: 1584: 1585: 1586: 1587: 1588: 1589: 1590: 1591: 1592: 1593: 1594: 1595: 1596: 1597: 1598: 1599: 1600: 1601: 1602: 1603: 1604: 1605: 1606: 1607: 1608: 1609: 1610: 1611: 1612: 1613: 1614: 1615: 1616: 1617: 1618: 1619: 1620: 1621: 1622: 1623: 1624: 1625: 1626: 1627: 1628: 1629: 1630: 1631: 1632: 1633: 1634: 1635: 1636: 1637: 1638: 1639: 1640: 1641: 1642: 1643: 1644: 1645: 1646: 1647: 1648: 1649: 1650: 1651: 1652: 1653: 1654: 1655: 1656: 1657: 1658: 1659: 1660: 1661: 1662: 1663: 1664: 1665: 1666: 1667: 1668: 1669: 1670: 1671: 1672: 1673: 1674: 1675: 1676: 1677: 1678: 1679: 1680: 1681: 1682: 1683: 1684: 1685: 1686: 1687: 1688: 1689: 1690: 1691: 1692: 1693: 1694: 1695: 1696: 1697: 1698: 1699: 1700: 1701: 1702: 1703: 1704: 1705: 1706: 1707: 1708: 1709: 1710: 1711: 1712: 1713: 1714: 1715: 1716: 1717: 1718: 1719: 1720: 1721: 1722: 1723: 1724: 1725: 1726: 1727: 1728: 1729: 1730: 1731: 1732: 1733: 1734: 1735: 1736: 1737: 1738: 1739: 1740: 1741: 1742: 1743: 1744: 1745: 1746: 1747: 1748: 1749: 1750: 1751: 1752: 1753: 1754: 1755: 1756: 1757: 1758: 1759: 1760: 1761: 1762: 1763: 1764: 1765: 1766: 1767: 1768: 1769: 1770: 1771: 1772: 1773: 1774: 1775: 1776: 1777: 1778: 1779: 1780: 1781: 1782: 1783: 1784: 1785: 1786: 1787: 1788: 1789: 1790: 1791: 1792: 1793: 1794: 1795: 1796: 1797: 1798: 1799: 1800: 1801: 1802: 1803: 1804: 1805: 1806: 1807: 1808: 1809: 1810: 1811: 1812: 1813: 1814: 1815: 1816: 1817: 1818: 1819: 1820: 1821: 1822: 1823: 1824: 1825: 1826: 1827: 1828: 1829: 1830: 1831: 1832: 1833: 1834: 1835: 1836: 1837: 1838: 1839: 1840: 1841: 1842: 1843: 1844: 1845: 1846: 1847: 1848: 1849: 1850: 1851: 1852: 1853: 1854: 1855: 1856: 1857: 1858: 1859: 1860: 1861: 1862: 1863: 1864: 1865: 1866: 1867: 1868: 1869: 1870: 1871: 1872: 1873: 1874: 1875: 1876: 1877: 1878: 1879: 1880: 1881: 1882: 1883: 1884: 1885: 1886: 1887: 1888: 1889: 1890: 1891: 1892: 1893: 1894: 1895: 1896: 1897: 1898: 1899: 1900: 1901: 1902: 1903: 1904: 1905: 1906: 1907: 1908: 1909: 1910: 1911: 1912: 1913: 1914: 1915: 1916: 1917: 1918: 1919: 1920: 1921: 1922: 1923: 1924: 1925: 1926: 1927: 1928: 1929: 1930: 1931: 1932: 1933: 1934: 1935: 1936: 1937: 1938: 1939: 1940: 1941: 1942: 1943: 1944: 1945: 1946: 1947: 1948: 1949: 1950: 1951: 1952: 1953: 1954: 1955: 1956: 1957: 1958: 1959: 1960: 1961: 1962: 1963: 1964: 1965: 1966: 1967: 1968: 1969: 1970: 1971: 1972: 1973: 1974: 1975: 1976: 1977: 1978: 1979: 1980: 1981: 1982: 1983: 1984: 1985: 1986: 1987: 1988: 1989: 1990: 1991: 1992: 1993: 1994: 1995: 1996: 1997: 1998: 1999: 2000: 2001: 2002: 2003: 2004: 2005: 2006: 2007: 2008: 2009: 2010: 2011: 2012: 2013: 2014: 2015: 2016: 2017: 2018: 2019: 2020: 2021: 2022: 2023: 2024: 2025: 2026: 2027: 2028: 2029: 2030: 2031: 2032: 2033: 2034: 2035: 2036: 2037: 2038: 2039: 2040: 2041: 2042: 2043: 2044: 2045: 2046: 2047: 2048: 2049: 2050: 2051: 2052: 2053: 2054: 2055: 2056: 2057: 2058: 2059: 2060: 2061: 2062: 2063: 2064: 2065: 2066: 2067: 2068: 2069: 2070: 2071: 2072: 2073: 2074: 2075: 2076: 2077: 2078: 2079: 2080: 2081: 2082: 2083: 2084: 2085: 2086: 2087: 2088: 2089: 2090: 2091: 2092: 2093: 2094: 2095: 2096: 2097: 2098: 2099: 2100: 2101: 2102: 2103: 2104: 2105: 2106: 2107: 2108: 2109: 2110: 2111: 2112: 2113: 2114: 2115: 2116: 2117: 2118: 2119: 2120: 2121: 2122: 2123: 2124: 2125: 2126: 2127: 2128: 2129: 2130: 2131: 2132: |
SUBROUTINE PDLAHQR( WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI,
$ ILOZ, IHIZ, Z, DESCZ, WORK, LWORK, IWORK,
$ ILWORK, INFO )
*
* -- ScaLAPACK routine (version 1.7.3) --
* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
* and University of California, Berkeley.
* 1.7.3: March 22, 2006
* modification suggested by Mark Fahey and Greg Henry
* 1.7.1: January 30, 2006
* 1.7.0: December 31, 1998
*
* .. Scalar Arguments ..
LOGICAL WANTT, WANTZ
INTEGER IHI, IHIZ, ILO, ILOZ, ILWORK, INFO, LWORK, N
* ..
* .. Array Arguments ..
INTEGER DESCA( * ), DESCZ( * ), IWORK( * )
DOUBLE PRECISION A( * ), WI( * ), WORK( * ), WR( * ), Z( * )
* ..
*
* Purpose
* =======
*
* PDLAHQR is an auxiliary routine used to find the Schur decomposition
* and or eigenvalues of a matrix already in Hessenberg form from
* cols ILO to IHI.
*
* Notes
* =====
*
* Each global data object is described by an associated description
* vector. This vector stores the information required to establish
* the mapping between an object element and its corresponding process
* and memory location.
*
* Let A be a generic term for any 2D block cyclicly distributed array.
* Such a global array has an associated description vector DESCA.
* In the following comments, the character _ should be read as
* "of the global array".
*
* NOTATION STORED IN EXPLANATION
* --------------- -------------- --------------------------------------
* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
* DTYPE_A = 1.
* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
* the BLACS process grid A is distribu-
* ted over. The context itself is glo-
* bal, but the handle (the integer
* value) may vary.
* M_A (global) DESCA( M_ ) The number of rows in the global
* array A.
* N_A (global) DESCA( N_ ) The number of columns in the global
* array A.
* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
* the rows of the array.
* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
* the columns of the array.
* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
* row of the array A is distributed.
* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
* first column of the array A is
* distributed.
* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
* array. LLD_A >= MAX(1,LOCr(M_A)).
*
* Let K be the number of rows or columns of a distributed matrix,
* and assume that its process grid has dimension p x q.
* LOCr( K ) denotes the number of elements of K that a process
* would receive if K were distributed over the p processes of its
* process column.
* Similarly, LOCc( K ) denotes the number of elements of K that a
* process would receive if K were distributed over the q processes of
* its process row.
* The values of LOCr() and LOCc() may be determined via a call to the
* ScaLAPACK tool function, NUMROC:
* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
* An upper bound for these quantities may be computed by:
* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
*
* Arguments
* =========
*
* WANTT (global input) LOGICAL
* = .TRUE. : the full Schur form T is required;
* = .FALSE.: only eigenvalues are required.
*
* WANTZ (global input) LOGICAL
* = .TRUE. : the matrix of Schur vectors Z is required;
* = .FALSE.: Schur vectors are not required.
*
* N (global input) INTEGER
* The order of the Hessenberg matrix A (and Z if WANTZ).
* N >= 0.
*
* ILO (global input) INTEGER
* IHI (global input) INTEGER
* It is assumed that A is already upper quasi-triangular in
* rows and columns IHI+1:N, and that A(ILO,ILO-1) = 0 (unless
* ILO = 1). PDLAHQR works primarily with the Hessenberg
* submatrix in rows and columns ILO to IHI, but applies
* transformations to all of H if WANTT is .TRUE..
* 1 <= ILO <= max(1,IHI); IHI <= N.
*
* A (global input/output) DOUBLE PRECISION array, dimension
* (DESCA(LLD_),*)
* On entry, the upper Hessenberg matrix A.
* On exit, if WANTT is .TRUE., A is upper quasi-triangular in
* rows and columns ILO:IHI, with any 2-by-2 or larger diagonal
* blocks not yet in standard form. If WANTT is .FALSE., the
* contents of A are unspecified on exit.
*
* DESCA (global and local input) INTEGER array of dimension DLEN_.
* The array descriptor for the distributed matrix A.
*
* WR (global replicated output) DOUBLE PRECISION array,
* dimension (N)
* WI (global replicated output) DOUBLE PRECISION array,
* dimension (N)
* The real and imaginary parts, respectively, of the computed
* eigenvalues ILO to IHI are stored in the corresponding
* elements of WR and WI. If two eigenvalues are computed as a
* complex conjugate pair, they are stored in consecutive
* elements of WR and WI, say the i-th and (i+1)th, with
* WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the
* eigenvalues are stored in the same order as on the diagonal
* of the Schur form returned in A. A may be returned with
* larger diagonal blocks until the next release.
*
* ILOZ (global input) INTEGER
* IHIZ (global input) INTEGER
* Specify the rows of Z to which transformations must be
* applied if WANTZ is .TRUE..
* 1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
*
* Z (global input/output) DOUBLE PRECISION array.
* If WANTZ is .TRUE., on entry Z must contain the current
* matrix Z of transformations accumulated by PDHSEQR, and on
* exit Z has been updated; transformations are applied only to
* the submatrix Z(ILOZ:IHIZ,ILO:IHI).
* If WANTZ is .FALSE., Z is not referenced.
*
* DESCZ (global and local input) INTEGER array of dimension DLEN_.
* The array descriptor for the distributed matrix Z.
*
* WORK (local output) DOUBLE PRECISION array of size LWORK
*
* LWORK (local input) INTEGER
* WORK(LWORK) is a local array and LWORK is assumed big enough
* so that LWORK >= 3*N +
* MAX( 2*MAX(DESCZ(LLD_),DESCA(LLD_)) + 2*LOCc(N),
* 7*Ceil(N/HBL)/LCM(NPROW,NPCOL)) )
*
* IWORK (global and local input) INTEGER array of size ILWORK
*
* ILWORK (local input) INTEGER
* This holds the some of the IBLK integer arrays. This is held
* as a place holder for the next release.
*
* INFO (global output) INTEGER
* < 0: parameter number -INFO incorrect or inconsistent
* = 0: successful exit
* > 0: PDLAHQR failed to compute all the eigenvalues ILO to IHI
* in a total of 30*(IHI-ILO+1) iterations; if INFO = i,
* elements i+1:ihi of WR and WI contain those eigenvalues
* which have been successfully computed.
*
* Logic:
* This algorithm is very similar to _LAHQR. Unlike _LAHQR,
* instead of sending one double shift through the largest
* unreduced submatrix, this algorithm sends multiple double shifts
* and spaces them apart so that there can be parallelism across
* several processor row/columns. Another critical difference is
* that this algorithm aggregrates multiple transforms together in
* order to apply them in a block fashion.
*
* Important Local Variables:
* IBLK = The maximum number of bulges that can be computed.
* Currently fixed. Future releases this won't be fixed.
* HBL = The square block size (HBL=DESCA(MB_)=DESCA(NB_))
* ROTN = The number of transforms to block together
* NBULGE = The number of bulges that will be attempted on the
* current submatrix.
* IBULGE = The current number of bulges started.
* K1(*),K2(*) = The current bulge loops from K1(*) to K2(*).
*
* Subroutines:
* This routine calls:
* PDLACONSB -> To determine where to start each iteration
* PDLAWIL -> Given the shift, get the transformation
* DLASORTE -> Pair up eigenvalues so that reals are paired.
* PDLACP3 -> Parallel array to local replicated array copy &
* back.
* DLAREF -> Row/column reflector applier. Core routine
* here.
* PDLASMSUB -> Finds negligible subdiagonal elements.
*
* Current Notes and/or Restrictions:
* 1.) This code requires the distributed block size to be square
* and at least six (6); unlike simpler codes like LU, this
* algorithm is extremely sensitive to block size. Unwise
* choices of too small a block size can lead to bad
* performance.
* 2.) This code requires A and Z to be distributed identically
* and have identical contxts.
* 3.) This release currently does not have a routine for
* resolving the Schur blocks into regular 2x2 form after
* this code is completed. Because of this, a significant
* performance impact is required while the deflation is done
* by sometimes a single column of processors.
* 4.) This code does not currently block the initial transforms
* so that none of the rows or columns for any bulge are
* completed until all are started. To offset pipeline
* start-up it is recommended that at least 2*LCM(NPROW,NPCOL)
* bulges are used (if possible)
* 5.) The maximum number of bulges currently supported is fixed at
* 32. In future versions this will be limited only by the
* incoming WORK array.
* 6.) The matrix A must be in upper Hessenberg form. If elements
* below the subdiagonal are nonzero, the resulting transforms
* may be nonsimilar. This is also true with the LAPACK
* routine.
* 7.) For this release, it is assumed RSRC_=CSRC_=0
* 8.) Currently, all the eigenvalues are distributed to all the
* nodes. Future releases will probably distribute the
* eigenvalues by the column partitioning.
* 9.) The internals of this routine are subject to change.
*
* Implemented by: G. Henry, November 17, 1996
*
* =====================================================================
*
* .. Parameters ..
INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
$ LLD_, MB_, M_, NB_, N_, RSRC_
PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
$ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
$ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
DOUBLE PRECISION ZERO, ONE, HALF
PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, HALF = 0.5D+0 )
DOUBLE PRECISION CONST
PARAMETER ( CONST = 1.50D+0 )
INTEGER IBLK
PARAMETER ( IBLK = 32 )
* ..
* .. Local Scalars ..
INTEGER CONTXT, DOWN, HBL, I, I1, I2, IAFIRST, IBULGE,
$ ICBUF, ICOL, ICOL1, ICOL2, IDIA, IERR, II,
$ IRBUF, IROW, IROW1, IROW2, ISPEC, ISTART,
$ ISTARTCOL, ISTARTROW, ISTOP, ISUB, ISUP,
$ ITERMAX, ITMP1, ITMP2, ITN, ITS, J, JAFIRST,
$ JBLK, JJ, K, KI, L, LCMRC, LDA, LDZ, LEFT,
$ LIHIH, LIHIZ, LILOH, LILOZ, LOCALI1, LOCALI2,
$ LOCALK, LOCALM, M, MODKM1, MYCOL, MYROW,
$ NBULGE, NH, NODE, NPCOL, NPROW, NR, NUM, NZ,
$ RIGHT, ROTN, UP, VECSIDX
DOUBLE PRECISION AVE, DISC, H00, H10, H11, H12, H21, H22, H33,
$ H43H34, H44, OVFL, S, SMLNUM, SUM, T1, T1COPY,
$ T2, T3, ULP, UNFL, V1SAVE, V2, V2SAVE, V3,
$ V3SAVE
* ..
* .. Local Arrays ..
INTEGER ICURCOL( IBLK ), ICURROW( IBLK ), K1( IBLK ),
$ K2( IBLK ), KCOL( IBLK ), KP2COL( IBLK ),
$ KP2ROW( IBLK ), KROW( IBLK ), LOCALK2( IBLK )
DOUBLE PRECISION S1( 2*IBLK, 2*IBLK ), SMALLA( 6, 6, IBLK ),
$ VCOPY( 3 )
* ..
* .. External Functions ..
INTEGER ILCM, NUMROC
DOUBLE PRECISION PDLAMCH
EXTERNAL ILCM, NUMROC, PDLAMCH
* ..
* .. External Subroutines ..
EXTERNAL BLACS_GRIDINFO, DCOPY, DGEBR2D, DGEBS2D,
$ DGERV2D, DGESD2D, DGSUM2D, DLAHQR, DLAREF,
$ DLARFG, DLASORTE, IGAMN2D, INFOG1L, INFOG2L,
$ PDLABAD, PDLACONSB, PDLACP3, PDLASMSUB,
$ PDLAWIL, PXERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, MAX, MIN, MOD, SIGN, SQRT
* ..
* .. Executable Statements ..
*
INFO = 0
*
ITERMAX = 30*( IHI-ILO+1 )
* ITERMAX = 0
IF( N.EQ.0 )
$ RETURN
*
* NODE (IAFIRST,JAFIRST) OWNS A(1,1)
*
HBL = DESCA( MB_ )
CONTXT = DESCA( CTXT_ )
LDA = DESCA( LLD_ )
IAFIRST = DESCA( RSRC_ )
JAFIRST = DESCA( CSRC_ )
LDZ = DESCZ( LLD_ )
CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL )
NODE = MYROW*NPCOL + MYCOL
NUM = NPROW*NPCOL
LEFT = MOD( MYCOL+NPCOL-1, NPCOL )
RIGHT = MOD( MYCOL+1, NPCOL )
UP = MOD( MYROW+NPROW-1, NPROW )
DOWN = MOD( MYROW+1, NPROW )
LCMRC = ILCM( NPROW, NPCOL )
*
* Determine the number of columns we have so we can check workspace
*
LOCALK = NUMROC( N, HBL, MYCOL, JAFIRST, NPCOL )
JJ = N / HBL
IF( JJ*HBL.LT.N )
$ JJ = JJ + 1
JJ = 7*JJ / LCMRC
IF( LWORK.LT.3*N+MAX( 2*MAX( LDA, LDZ )+2*LOCALK, JJ ) ) THEN
INFO = -15
END IF
IF( DESCZ( CTXT_ ).NE.DESCA( CTXT_ ) ) THEN
INFO = -( 1300+CTXT_ )
END IF
IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN
INFO = -( 700+NB_ )
END IF
IF( DESCZ( MB_ ).NE.DESCZ( NB_ ) ) THEN
INFO = -( 1300+NB_ )
END IF
IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN
INFO = -( 1300+MB_ )
END IF
IF( ( DESCA( RSRC_ ).NE.0 ) .OR. ( DESCA( CSRC_ ).NE.0 ) ) THEN
INFO = -( 700+RSRC_ )
END IF
IF( ( DESCZ( RSRC_ ).NE.0 ) .OR. ( DESCZ( CSRC_ ).NE.0 ) ) THEN
INFO = -( 1300+RSRC_ )
END IF
IF( ( ILO.GT.N ) .OR. ( ILO.LT.1 ) ) THEN
INFO = -4
END IF
IF( ( IHI.GT.N ) .OR. ( IHI.LT.1 ) ) THEN
INFO = -5
END IF
IF( HBL.LT.5 ) THEN
INFO = -( 700+MB_ )
END IF
CALL IGAMN2D( CONTXT, 'ALL', ' ', 1, 1, INFO, 1, ITMP1, ITMP2, -1,
$ -1, -1 )
IF( INFO.LT.0 ) THEN
CALL PXERBLA( CONTXT, 'PDLAHQR', -INFO )
RETURN
END IF
*
* Set work array indices
*
VECSIDX = 0
IDIA = 3*N
ISUB = 3*N
ISUP = 3*N
IRBUF = 3*N
ICBUF = 3*N
*
* Find a value for ROTN
*
ROTN = HBL / 3
ROTN = MAX( ROTN, HBL-2 )
ROTN = MIN( ROTN, 1 )
*
IF( ILO.EQ.IHI ) THEN
CALL INFOG2L( ILO, ILO, DESCA, NPROW, NPCOL, MYROW, MYCOL,
$ IROW, ICOL, II, JJ )
IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN
WR( ILO ) = A( ( ICOL-1 )*LDA+IROW )
ELSE
WR( ILO ) = ZERO
END IF
WI( ILO ) = ZERO
RETURN
END IF
*
NH = IHI - ILO + 1
NZ = IHIZ - ILOZ + 1
*
CALL INFOG1L( ILOZ, HBL, NPROW, MYROW, 0, LILOZ, LIHIZ )
LIHIZ = NUMROC( IHIZ, HBL, MYROW, 0, NPROW )
*
* Set machine-dependent constants for the stopping criterion.
* If NORM(H) <= SQRT(OVFL), overflow should not occur.
*
UNFL = PDLAMCH( CONTXT, 'SAFE MINIMUM' )
OVFL = ONE / UNFL
CALL PDLABAD( CONTXT, UNFL, OVFL )
ULP = PDLAMCH( CONTXT, 'PRECISION' )
SMLNUM = UNFL*( NH / ULP )
*
* I1 and I2 are the indices of the first row and last column of H
* to which transformations must be applied. If eigenvalues only are
* being computed, I1 and I2 are set inside the main loop.
*
IF( WANTT ) THEN
I1 = 1
I2 = N
END IF
*
* ITN is the total number of QR iterations allowed.
*
ITN = ITERMAX
*
* The main loop begins here. I is the loop index and decreases from
* IHI to ILO in steps of our schur block size (<=2*IBLK). Each
* iteration of the loop works with the active submatrix in rows
* and columns L to I. Eigenvalues I+1 to IHI have already
* converged. Either L = ILO or the global A(L,L-1) is negligible
* so that the matrix splits.
*
I = IHI
10 CONTINUE
L = ILO
IF( I.LT.ILO )
$ GO TO 450
*
* Perform QR iterations on rows and columns ILO to I until a
* submatrix of order 1 or 2 splits off at the bottom because a
* subdiagonal element has become negligible.
*
DO 420 ITS = 0, ITN
*
* Look for a single small subdiagonal element.
*
CALL PDLASMSUB( A, DESCA, I, L, K, SMLNUM, WORK( IRBUF+1 ),
$ LWORK-IRBUF )
L = K
*
IF( L.GT.ILO ) THEN
*
* H(L,L-1) is negligible
*
CALL INFOG2L( L, L-1, DESCA, NPROW, NPCOL, MYROW, MYCOL,
$ IROW, ICOL, ITMP1, ITMP2 )
IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN
A( ( ICOL-1 )*LDA+IROW ) = ZERO
END IF
WORK( ISUB+L-1 ) = ZERO
END IF
*
* Exit from loop if a submatrix of order 1 or 2 has split off.
*
M = L - 10
* IF ( L .GE. I - (2*IBLK-1) )
* IF ( L .GE. I - MAX(2*IBLK-1,HBL) )
IF( L.GE.I-1 )
$ GO TO 430
*
* Now the active submatrix is in rows and columns L to I. If
* eigenvalues only are being computed, only the active submatrix
* need be transformed.
*
IF( .NOT.WANTT ) THEN
I1 = L
I2 = I
END IF
*
* Copy submatrix of size 2*JBLK and prepare to do generalized
* Wilkinson shift or an exceptional shift
*
JBLK = MIN( IBLK, ( ( I-L+1 ) / 2 )-1 )
IF( JBLK.GT.LCMRC ) THEN
*
* Make sure it's divisible by LCM (we want even workloads!)
*
JBLK = JBLK - MOD( JBLK, LCMRC )
END IF
JBLK = MIN( JBLK, 2*LCMRC )
JBLK = MAX( JBLK, 1 )
*
CALL PDLACP3( 2*JBLK, I-2*JBLK+1, A, DESCA, S1, 2*IBLK, -1, -1,
$ 0 )
IF( ITS.EQ.20 .OR. ITS.EQ.40 ) THEN
*
* Exceptional shift.
*
DO 20 II = 2*JBLK, 2, -1
S1( II, II ) = CONST*( ABS( S1( II, II ) )+
$ ABS( S1( II, II-1 ) ) )
S1( II, II-1 ) = ZERO
S1( II-1, II ) = ZERO
20 CONTINUE
S1( 1, 1 ) = CONST*ABS( S1( 1, 1 ) )
ELSE
CALL DLAHQR( .FALSE., .FALSE., 2*JBLK, 1, 2*JBLK, S1,
$ 2*IBLK, WORK( IRBUF+1 ), WORK( ICBUF+1 ), 1,
$ 2*JBLK, Z, LDZ, IERR )
*
* Prepare to use Wilkinson's double shift
*
H44 = S1( 2*JBLK, 2*JBLK )
H33 = S1( 2*JBLK-1, 2*JBLK-1 )
H43H34 = S1( 2*JBLK-1, 2*JBLK )*S1( 2*JBLK, 2*JBLK-1 )
IF( ( JBLK.GT.1 ) .AND. ( ITS.GT.30 ) ) THEN
S = S1( 2*JBLK-1, 2*JBLK-2 )
DISC = ( H33-H44 )*HALF
DISC = DISC*DISC + H43H34
IF( DISC.GT.ZERO ) THEN
*
* Real roots: Use Wilkinson's shift twice
*
DISC = SQRT( DISC )
AVE = HALF*( H33+H44 )
IF( ABS( H33 )-ABS( H44 ).GT.ZERO ) THEN
H33 = H33*H44 - H43H34
H44 = H33 / ( SIGN( DISC, AVE )+AVE )
ELSE
H44 = SIGN( DISC, AVE ) + AVE
END IF
H33 = H44
H43H34 = ZERO
END IF
END IF
END IF
*
* Look for two consecutive small subdiagonal elements:
* PDLACONSB is the routine that does this.
*
CALL PDLACONSB( A, DESCA, I, L, M, H44, H33, H43H34,
$ WORK( IRBUF+1 ), LWORK-IRBUF )
*
* Skip small submatrices
*
* IF ( M .GE. I - 5 )
* $ GO TO 80
*
* Double-shift QR step
*
* NBULGE is the number of bulges that will be attempted
*
ISTOP = MIN( M+ROTN-MOD( M, ROTN ), I-2 )
ISTOP = MIN( ISTOP, M+HBL-3-MOD( M-1, HBL ) )
ISTOP = MIN( ISTOP, I2-2 )
ISTOP = MAX( ISTOP, M )
NBULGE = ( I-1-ISTOP ) / HBL
*
* Do not exceed maximum determined.
*
NBULGE = MIN( NBULGE, JBLK )
IF( NBULGE.GT.LCMRC ) THEN
*
* Make sure it's divisible by LCM (we want even workloads!)
*
NBULGE = NBULGE - MOD( NBULGE, LCMRC )
END IF
NBULGE = MAX( NBULGE, 1 )
*
IF( ( ITS.NE.20 ) .AND. ( ITS.NE.40 ) .AND. ( NBULGE.GT.1 ) )
$ THEN
*
* sort the eigenpairs so that they are in twos for double
* shifts. only call if several need sorting
*
CALL DLASORTE( S1( 2*( JBLK-NBULGE )+1,
$ 2*( JBLK-NBULGE )+1 ), 2*IBLK, 2*NBULGE,
$ WORK( IRBUF+1 ), IERR )
END IF
*
* IBULGE is the number of bulges going so far
*
IBULGE = 1
*
* "A" row defs : main row transforms from LOCALK to LOCALI2
*
CALL INFOG1L( M, HBL, NPCOL, MYCOL, 0, ITMP1, LOCALK )
LOCALK = NUMROC( N, HBL, MYCOL, 0, NPCOL )
CALL INFOG1L( 1, HBL, NPCOL, MYCOL, 0, ICOL1, LOCALI2 )
LOCALI2 = NUMROC( I2, HBL, MYCOL, 0, NPCOL )
*
* "A" col defs : main col transforms from LOCALI1 to LOCALM
*
CALL INFOG1L( I1, HBL, NPROW, MYROW, 0, LOCALI1, ICOL1 )
ICOL1 = NUMROC( N, HBL, MYROW, 0, NPROW )
CALL INFOG1L( 1, HBL, NPROW, MYROW, 0, LOCALM, ICOL1 )
ICOL1 = NUMROC( MIN( M+3, I ), HBL, MYROW, 0, NPROW )
*
* Which row & column will start the bulges
*
ISTARTROW = MOD( ( M+1 ) / HBL, NPROW ) + IAFIRST
ISTARTCOL = MOD( ( M+1 ) / HBL, NPCOL ) + JAFIRST
*
CALL INFOG1L( M, HBL, NPROW, MYROW, 0, II, ITMP2 )
ITMP2 = NUMROC( N, HBL, MYROW, 0, NPROW )
CALL INFOG1L( M, HBL, NPCOL, MYCOL, 0, JJ, ITMP2 )
ITMP2 = NUMROC( N, HBL, MYCOL, 0, NPCOL )
CALL INFOG1L( 1, HBL, NPROW, MYROW, 0, ISTOP, KP2ROW( 1 ) )
KP2ROW( 1 ) = NUMROC( M+2, HBL, MYROW, 0, NPROW )
CALL INFOG1L( 1, HBL, NPCOL, MYCOL, 0, ISTOP, KP2COL( 1 ) )
KP2COL( 1 ) = NUMROC( M+2, HBL, MYCOL, 0, NPCOL )
*
* Set all values for bulges. All bulges are stored in
* intermediate steps as loops over KI. Their current "task"
* over the global M to I-1 values is always K1(KI) to K2(KI).
* However, because there are many bulges, K1(KI) & K2(KI) might
* go past that range while later bulges (KI+1,KI+2,etc..) are
* finishing up.
*
* Rules:
* If MOD(K1(KI)-1,HBL) < HBL-2 then MOD(K2(KI)-1,HBL)<HBL-2
* If MOD(K1(KI)-1,HBL) = HBL-2 then MOD(K2(KI)-1,HBL)=HBL-2
* If MOD(K1(KI)-1,HBL) = HBL-1 then MOD(K2(KI)-1,HBL)=HBL-1
* K2(KI)-K1(KI) <= ROTN
*
* We first hit a border when MOD(K1(KI)-1,HBL)=HBL-2 and we hit
* it again when MOD(K1(KI)-1,HBL)=HBL-1.
*
DO 30 KI = 1, NBULGE
K1( KI ) = M
ISTOP = MIN( M+ROTN-MOD( M, ROTN ), I-2 )
ISTOP = MIN( ISTOP, M+HBL-3-MOD( M-1, HBL ) )
ISTOP = MIN( ISTOP, I2-2 )
ISTOP = MAX( ISTOP, M )
K2( KI ) = ISTOP
ICURROW( KI ) = ISTARTROW
ICURCOL( KI ) = ISTARTCOL
LOCALK2( KI ) = ITMP1
KROW( KI ) = II
KCOL( KI ) = JJ
IF( KI.GT.1 )
$ KP2ROW( KI ) = KP2ROW( 1 )
IF( KI.GT.1 )
$ KP2COL( KI ) = KP2COL( 1 )
30 CONTINUE
*
* Get first transform on node who owns M+2,M+2
*
DO 31 ITMP1 = 1, 3
VCOPY(ITMP1) = ZERO
31 CONTINUE
ITMP1 = ISTARTROW
ITMP2 = ISTARTCOL
CALL PDLAWIL( ITMP1, ITMP2, M, A, DESCA, H44, H33, H43H34,
$ VCOPY )
V1SAVE = VCOPY( 1 )
V2SAVE = VCOPY( 2 )
V3SAVE = VCOPY( 3 )
IF( K2( IBULGE ).LE.I-1 ) THEN
40 CONTINUE
IF( ( K1( IBULGE ).GE.M+5 ) .AND. ( IBULGE.LT.NBULGE ) )
$ THEN
IF( ( MOD( K2( IBULGE )+2, HBL ).EQ.MOD( K2( IBULGE+1 )+
$ 2, HBL ) ) .AND. ( K1( 1 ).LE.I-1 ) ) THEN
H44 = S1( 2*JBLK-2*IBULGE, 2*JBLK-2*IBULGE )
H33 = S1( 2*JBLK-2*IBULGE-1, 2*JBLK-2*IBULGE-1 )
H43H34 = S1( 2*JBLK-2*IBULGE-1, 2*JBLK-2*IBULGE )*
$ S1( 2*JBLK-2*IBULGE, 2*JBLK-2*IBULGE-1 )
ITMP1 = ISTARTROW
ITMP2 = ISTARTCOL
CALL PDLAWIL( ITMP1, ITMP2, M, A, DESCA, H44, H33,
$ H43H34, VCOPY )
V1SAVE = VCOPY( 1 )
V2SAVE = VCOPY( 2 )
V3SAVE = VCOPY( 3 )
IBULGE = IBULGE + 1
END IF
END IF
*
* When we hit a border, there are row and column transforms that
* overlap over several processors and the code gets very
* "congested." As a remedy, when we first hit a border, a 6x6
* *local* matrix is generated on one node (called SMALLA) and
* work is done on that. At the end of the border, the data is
* passed back and everything stays a lot simpler.
*
DO 80 KI = 1, IBULGE
*
ISTART = MAX( K1( KI ), M )
ISTOP = MIN( K2( KI ), I-1 )
K = ISTART
MODKM1 = MOD( K-1, HBL )
IF( ( MODKM1.GE.HBL-2 ) .AND. ( K.LE.I-1 ) ) THEN
DO 81 ITMP1 = 1, 6
DO 82 ITMP2 = 1, 6
SMALLA(ITMP1, ITMP2, KI) = ZERO
82 CONTINUE
81 CONTINUE
IF( ( MODKM1.EQ.HBL-2 ) .AND. ( K.LT.I-1 ) ) THEN
*
* Copy 6 elements from global A(K-1:K+4,K-1:K+4)
*
CALL INFOG2L( K+2, K+2, DESCA, NPROW, NPCOL, MYROW,
$ MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
CALL PDLACP3( MIN( 6, N-K+2 ), K-1, A, DESCA,
$ SMALLA( 1, 1, KI ), 6, ITMP1, ITMP2,
$ 0 )
END IF
IF( MODKM1.EQ.HBL-1 ) THEN
*
* Copy 6 elements from global A(K-2:K+3,K-2:K+3)
*
CALL INFOG2L( K+1, K+1, DESCA, NPROW, NPCOL, MYROW,
$ MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
CALL PDLACP3( MIN( 6, N-K+3 ), K-2, A, DESCA,
$ SMALLA( 1, 1, KI ), 6, ITMP1, ITMP2,
$ 0 )
END IF
END IF
*
* DLAHQR used to have a single row application and a single
* column application to H. Here we do something a little
* more clever. We break each transformation down into 3
* parts:
* 1.) The minimum amount of work it takes to determine
* a group of ROTN transformations (this is on
* the critical path.) (Loops 130-180)
* 2.) The small work it takes so that each of the rows
* and columns is at the same place. For example,
* all ROTN row transforms are all complete
* through some column TMP. (Loops within 190)
* 3.) The majority of the row and column transforms
* are then applied in a block fashion.
* (Loops 290 on.)
*
* Each of these three parts are further subdivided into 3
* parts:
* A.) Work at the start of a border when
* MOD(ISTART-1,HBL) = HBL-2
* B.) Work at the end of a border when
* MOD(ISTART-1,HBL) = HBL-1
* C.) Work in the middle of the block when
* MOD(ISTART-1,HBL) < HBL-2
*
IF( ( MYROW.EQ.ICURROW( KI ) ) .AND.
$ ( MYCOL.EQ.ICURCOL( KI ) ) .AND.
$ ( MODKM1.EQ.HBL-2 ) .AND.
$ ( ISTART.LT.MIN( I-1, ISTOP+1 ) ) ) THEN
K = ISTART
NR = MIN( 3, I-K+1 )
IF( K.GT.M ) THEN
CALL DCOPY( NR, SMALLA( 2, 1, KI ), 1, VCOPY, 1 )
ELSE
VCOPY( 1 ) = V1SAVE
VCOPY( 2 ) = V2SAVE
VCOPY( 3 ) = V3SAVE
END IF
CALL DLARFG( NR, VCOPY( 1 ), VCOPY( 2 ), 1, T1COPY )
IF( K.GT.M ) THEN
SMALLA( 2, 1, KI ) = VCOPY( 1 )
SMALLA( 3, 1, KI ) = ZERO
IF( K.LT.I-1 )
$ SMALLA( 4, 1, KI ) = ZERO
ELSE IF( M.GT.L ) THEN
SMALLA( 2, 1, KI ) = -SMALLA( 2, 1, KI )
END IF
V2 = VCOPY( 2 )
T2 = T1COPY*V2
WORK( VECSIDX+( K-1 )*3+1 ) = VCOPY( 2 )
WORK( VECSIDX+( K-1 )*3+2 ) = VCOPY( 3 )
WORK( VECSIDX+( K-1 )*3+3 ) = T1COPY
END IF
*
IF( ( MOD( ISTOP-1, HBL ).EQ.HBL-1 ) .AND.
$ ( MYROW.EQ.ICURROW( KI ) ) .AND.
$ ( MYCOL.EQ.ICURCOL( KI ) ) .AND.
$ ( ISTART.LE.MIN( I, ISTOP ) ) ) THEN
K = ISTART
NR = MIN( 3, I-K+1 )
IF( K.GT.M ) THEN
CALL DCOPY( NR, SMALLA( 3, 2, KI ), 1, VCOPY, 1 )
ELSE
VCOPY( 1 ) = V1SAVE
VCOPY( 2 ) = V2SAVE
VCOPY( 3 ) = V3SAVE
END IF
CALL DLARFG( NR, VCOPY( 1 ), VCOPY( 2 ), 1, T1COPY )
IF( K.GT.M ) THEN
SMALLA( 3, 2, KI ) = VCOPY( 1 )
SMALLA( 4, 2, KI ) = ZERO
IF( K.LT.I-1 )
$ SMALLA( 5, 2, KI ) = ZERO
*
* Set a subdiagonal to zero now if it's possible
*
* H11 = SMALLA(1,1,KI)
* H10 = SMALLA(2,1,KI)
* H22 = SMALLA(2,2,KI)
* IF ( ABS(H10) .LE. MAX(ULP*(ABS(H11)+ABS(H22)),
* $ SMLNUM) ) THEN
* SMALLA(2,1,KI) = ZERO
* WORK(ISUB+K-2) = ZERO
* END IF
ELSE IF( M.GT.L ) THEN
SMALLA( 3, 2, KI ) = -SMALLA( 3, 2, KI )
END IF
V2 = VCOPY( 2 )
T2 = T1COPY*V2
WORK( VECSIDX+( K-1 )*3+1 ) = VCOPY( 2 )
WORK( VECSIDX+( K-1 )*3+2 ) = VCOPY( 3 )
WORK( VECSIDX+( K-1 )*3+3 ) = T1COPY
END IF
*
IF( ( MODKM1.EQ.0 ) .AND. ( ISTART.LE.I-1 ) .AND.
$ ( MYROW.EQ.ICURROW( KI ) ) .AND.
$ ( RIGHT.EQ.ICURCOL( KI ) ) ) THEN
*
* (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART)
*
IROW1 = KROW( KI )
ICOL1 = LOCALK2( KI )
IF( ISTART.GT.M ) THEN
VCOPY( 1 ) = SMALLA( 4, 3, KI )
VCOPY( 2 ) = SMALLA( 5, 3, KI )
VCOPY( 3 ) = SMALLA( 6, 3, KI )
NR = MIN( 3, I-ISTART+1 )
CALL DLARFG( NR, VCOPY( 1 ), VCOPY( 2 ), 1,
$ T1COPY )
A( ( ICOL1-2 )*LDA+IROW1 ) = VCOPY( 1 )
A( ( ICOL1-2 )*LDA+IROW1+1 ) = ZERO
IF( ISTART.LT.I-1 ) THEN
A( ( ICOL1-2 )*LDA+IROW1+2 ) = ZERO
END IF
ELSE
IF( M.GT.L ) THEN
A( ( ICOL1-2 )*LDA+IROW1 ) = -A( ( ICOL1-2 )*
$ LDA+IROW1 )
END IF
END IF
END IF
*
IF( ( MYROW.EQ.ICURROW( KI ) ) .AND.
$ ( MYCOL.EQ.ICURCOL( KI ) ) .AND.
$ ( ( ( MODKM1.EQ.HBL-2 ) .AND. ( ISTART.EQ.I-
$ 1 ) ) .OR. ( ( MODKM1.LT.HBL-2 ) .AND. ( ISTART.LE.I-
$ 1 ) ) ) ) THEN
*
* (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART)
*
IROW1 = KROW( KI )
ICOL1 = LOCALK2( KI )
DO 70 K = ISTART, ISTOP
*
* Create and do these transforms
*
NR = MIN( 3, I-K+1 )
IF( K.GT.M ) THEN
IF( MOD( K-1, HBL ).EQ.0 ) THEN
VCOPY( 1 ) = SMALLA( 4, 3, KI )
VCOPY( 2 ) = SMALLA( 5, 3, KI )
VCOPY( 3 ) = SMALLA( 6, 3, KI )
ELSE
VCOPY( 1 ) = A( ( ICOL1-2 )*LDA+IROW1 )
VCOPY( 2 ) = A( ( ICOL1-2 )*LDA+IROW1+1 )
IF( NR.EQ.3 ) THEN
VCOPY( 3 ) = A( ( ICOL1-2 )*LDA+IROW1+2 )
END IF
END IF
ELSE
VCOPY( 1 ) = V1SAVE
VCOPY( 2 ) = V2SAVE
VCOPY( 3 ) = V3SAVE
END IF
CALL DLARFG( NR, VCOPY( 1 ), VCOPY( 2 ), 1,
$ T1COPY )
IF( K.GT.M ) THEN
IF( MOD( K-1, HBL ).GT.0 ) THEN
A( ( ICOL1-2 )*LDA+IROW1 ) = VCOPY( 1 )
A( ( ICOL1-2 )*LDA+IROW1+1 ) = ZERO
IF( K.LT.I-1 ) THEN
A( ( ICOL1-2 )*LDA+IROW1+2 ) = ZERO
END IF
*
* Set a subdiagonal to zero now if it's possible
*
* IF ( (IROW1.GT.2) .AND. (ICOL1.GT.2) .AND.
* $ (MOD(K-1,HBL) .GT. 1) ) THEN
* H11 = A((ICOL1-3)*LDA+IROW1-2)
* H10 = A((ICOL1-3)*LDA+IROW1-1)
* H22 = A((ICOL1-2)*LDA+IROW1-1)
* IF ( ABS(H10).LE.MAX(ULP*(ABS(H11)+ABS(H22)),
* $ SMLNUM) ) THEN
* A((ICOL1-3)*LDA+IROW1-1) = ZERO
* END IF
* END IF
END IF
ELSE IF( M.GT.L ) THEN
IF( MOD( K-1, HBL ).GT.0 ) THEN
A( ( ICOL1-2 )*LDA+IROW1 ) = -A( ( ICOL1-2 )*
$ LDA+IROW1 )
END IF
END IF
V2 = VCOPY( 2 )
T2 = T1COPY*V2
WORK( VECSIDX+( K-1 )*3+1 ) = VCOPY( 2 )
WORK( VECSIDX+( K-1 )*3+2 ) = VCOPY( 3 )
WORK( VECSIDX+( K-1 )*3+3 ) = T1COPY
T1 = T1COPY
IF( K.LT.ISTOP ) THEN
*
* Do some work so next step is ready...
*
V3 = VCOPY( 3 )
T3 = T1*V3
DO 50 J = ICOL1, MIN( K2( KI )+1, I-1 ) +
$ ICOL1 - K
SUM = A( ( J-1 )*LDA+IROW1 ) +
$ V2*A( ( J-1 )*LDA+IROW1+1 ) +
$ V3*A( ( J-1 )*LDA+IROW1+2 )
A( ( J-1 )*LDA+IROW1 ) = A( ( J-1 )*LDA+
$ IROW1 ) - SUM*T1
A( ( J-1 )*LDA+IROW1+1 ) = A( ( J-1 )*LDA+
$ IROW1+1 ) - SUM*T2
A( ( J-1 )*LDA+IROW1+2 ) = A( ( J-1 )*LDA+
$ IROW1+2 ) - SUM*T3
50 CONTINUE
ITMP1 = LOCALK2( KI )
DO 60 J = IROW1 + 1, IROW1 + 3
SUM = A( ( ICOL1-1 )*LDA+J ) +
$ V2*A( ICOL1*LDA+J ) +
$ V3*A( ( ICOL1+1 )*LDA+J )
A( ( ICOL1-1 )*LDA+J ) = A( ( ICOL1-1 )*LDA+
$ J ) - SUM*T1
A( ICOL1*LDA+J ) = A( ICOL1*LDA+J ) - SUM*T2
A( ( ICOL1+1 )*LDA+J ) = A( ( ICOL1+1 )*LDA+
$ J ) - SUM*T3
60 CONTINUE
END IF
IROW1 = IROW1 + 1
ICOL1 = ICOL1 + 1
70 CONTINUE
END IF
*
IF( MODKM1.EQ.HBL-2 ) THEN
IF( ( DOWN.EQ.ICURROW( KI ) ) .AND.
$ ( RIGHT.EQ.ICURCOL( KI ) ) .AND. ( NUM.GT.1 ) )
$ THEN
CALL DGERV2D( CONTXT, 3, 1,
$ WORK( VECSIDX+( ISTART-1 )*3+1 ), 3,
$ DOWN, RIGHT )
END IF
IF( ( MYROW.EQ.ICURROW( KI ) ) .AND.
$ ( MYCOL.EQ.ICURCOL( KI ) ) .AND. ( NUM.GT.1 ) )
$ THEN
CALL DGESD2D( CONTXT, 3, 1,
$ WORK( VECSIDX+( ISTART-1 )*3+1 ), 3,
$ UP, LEFT )
END IF
IF( ( DOWN.EQ.ICURROW( KI ) ) .AND.
$ ( NPCOL.GT.1 ) .AND. ( ISTART.LE.ISTOP ) ) THEN
JJ = MOD( ICURCOL( KI )+NPCOL-1, NPCOL )
IF( MYCOL.NE.JJ ) THEN
CALL DGEBR2D( CONTXT, 'ROW', ' ',
$ 3*( ISTOP-ISTART+1 ), 1,
$ WORK( VECSIDX+( ISTART-1 )*3+1 ),
$ 3*( ISTOP-ISTART+1 ), MYROW, JJ )
ELSE
CALL DGEBS2D( CONTXT, 'ROW', ' ',
$ 3*( ISTOP-ISTART+1 ), 1,
$ WORK( VECSIDX+( ISTART-1 )*3+1 ),
$ 3*( ISTOP-ISTART+1 ) )
END IF
END IF
END IF
*
* Broadcast Householder information from the block
*
IF( ( MYROW.EQ.ICURROW( KI ) ) .AND. ( NPCOL.GT.1 ) .AND.
$ ( ISTART.LE.ISTOP ) ) THEN
IF( MYCOL.NE.ICURCOL( KI ) ) THEN
CALL DGEBR2D( CONTXT, 'ROW', ' ',
$ 3*( ISTOP-ISTART+1 ), 1,
$ WORK( VECSIDX+( ISTART-1 )*3+1 ),
$ 3*( ISTOP-ISTART+1 ), MYROW,
$ ICURCOL( KI ) )
ELSE
CALL DGEBS2D( CONTXT, 'ROW', ' ',
$ 3*( ISTOP-ISTART+1 ), 1,
$ WORK( VECSIDX+( ISTART-1 )*3+1 ),
$ 3*( ISTOP-ISTART+1 ) )
END IF
END IF
80 CONTINUE
*
* Now do column transforms and finish work
*
DO 90 KI = 1, IBULGE
*
ISTART = MAX( K1( KI ), M )
ISTOP = MIN( K2( KI ), I-1 )
*
IF( MOD( ISTART-1, HBL ).EQ.HBL-2 ) THEN
IF( ( RIGHT.EQ.ICURCOL( KI ) ) .AND.
$ ( NPROW.GT.1 ) .AND. ( ISTART.LE.ISTOP ) ) THEN
JJ = MOD( ICURROW( KI )+NPROW-1, NPROW )
IF( MYROW.NE.JJ ) THEN
CALL DGEBR2D( CONTXT, 'COL', ' ',
$ 3*( ISTOP-ISTART+1 ), 1,
$ WORK( VECSIDX+( ISTART-1 )*3+1 ),
$ 3*( ISTOP-ISTART+1 ), JJ, MYCOL )
ELSE
CALL DGEBS2D( CONTXT, 'COL', ' ',
$ 3*( ISTOP-ISTART+1 ), 1,
$ WORK( VECSIDX+( ISTART-1 )*3+1 ),
$ 3*( ISTOP-ISTART+1 ) )
END IF
END IF
END IF
*
IF( ( MYCOL.EQ.ICURCOL( KI ) ) .AND. ( NPROW.GT.1 ) .AND.
$ ( ISTART.LE.ISTOP ) ) THEN
IF( MYROW.NE.ICURROW( KI ) ) THEN
CALL DGEBR2D( CONTXT, 'COL', ' ',
$ 3*( ISTOP-ISTART+1 ), 1,
$ WORK( VECSIDX+( ISTART-1 )*3+1 ),
$ 3*( ISTOP-ISTART+1 ), ICURROW( KI ),
$ MYCOL )
ELSE
CALL DGEBS2D( CONTXT, 'COL', ' ',
$ 3*( ISTOP-ISTART+1 ), 1,
$ WORK( VECSIDX+( ISTART-1 )*3+1 ),
$ 3*( ISTOP-ISTART+1 ) )
END IF
END IF
90 CONTINUE
*
* Now do make up work to have things in block fashion
*
DO 150 KI = 1, IBULGE
ISTART = MAX( K1( KI ), M )
ISTOP = MIN( K2( KI ), I-1 )
*
MODKM1 = MOD( ISTART-1, HBL )
IF( ( MYROW.EQ.ICURROW( KI ) ) .AND.
$ ( MYCOL.EQ.ICURCOL( KI ) ) .AND.
$ ( MODKM1.EQ.HBL-2 ) .AND. ( ISTART.LT.I-1 ) ) THEN
K = ISTART
*
* Catch up on column & border work
*
NR = MIN( 3, I-K+1 )
V2 = WORK( VECSIDX+( K-1 )*3+1 )
V3 = WORK( VECSIDX+( K-1 )*3+2 )
T1 = WORK( VECSIDX+( K-1 )*3+3 )
IF( NR.EQ.3 ) THEN
*
* Do some work so next step is ready...
*
* V3 = VCOPY( 3 )
T2 = T1*V2
T3 = T1*V3
ITMP1 = MIN( 6, I2+2-K )
ITMP2 = MAX( I1-K+2, 1 )
DO 100 J = 2, ITMP1
SUM = SMALLA( 2, J, KI ) +
$ V2*SMALLA( 3, J, KI ) +
$ V3*SMALLA( 4, J, KI )
SMALLA( 2, J, KI ) = SMALLA( 2, J, KI ) - SUM*T1
SMALLA( 3, J, KI ) = SMALLA( 3, J, KI ) - SUM*T2
SMALLA( 4, J, KI ) = SMALLA( 4, J, KI ) - SUM*T3
100 CONTINUE
DO 110 J = ITMP2, 5
SUM = SMALLA( J, 2, KI ) +
$ V2*SMALLA( J, 3, KI ) +
$ V3*SMALLA( J, 4, KI )
SMALLA( J, 2, KI ) = SMALLA( J, 2, KI ) - SUM*T1
SMALLA( J, 3, KI ) = SMALLA( J, 3, KI ) - SUM*T2
SMALLA( J, 4, KI ) = SMALLA( J, 4, KI ) - SUM*T3
110 CONTINUE
END IF
END IF
*
IF( ( MOD( ISTART-1, HBL ).EQ.HBL-1 ) .AND.
$ ( ISTART.LE.ISTOP ) .AND.
$ ( MYROW.EQ.ICURROW( KI ) ) .AND.
$ ( MYCOL.EQ.ICURCOL( KI ) ) ) THEN
K = ISTOP
*
* Catch up on column & border work
*
NR = MIN( 3, I-K+1 )
V2 = WORK( VECSIDX+( K-1 )*3+1 )
V3 = WORK( VECSIDX+( K-1 )*3+2 )
T1 = WORK( VECSIDX+( K-1 )*3+3 )
IF( NR.EQ.3 ) THEN
*
* Do some work so next step is ready...
*
* V3 = VCOPY( 3 )
T2 = T1*V2
T3 = T1*V3
ITMP1 = MIN( 6, I2-K+3 )
ITMP2 = MAX( I1-K+3, 1 )
DO 120 J = 3, ITMP1
SUM = SMALLA( 3, J, KI ) +
$ V2*SMALLA( 4, J, KI ) +
$ V3*SMALLA( 5, J, KI )
SMALLA( 3, J, KI ) = SMALLA( 3, J, KI ) - SUM*T1
SMALLA( 4, J, KI ) = SMALLA( 4, J, KI ) - SUM*T2
SMALLA( 5, J, KI ) = SMALLA( 5, J, KI ) - SUM*T3
120 CONTINUE
DO 130 J = ITMP2, 6
SUM = SMALLA( J, 3, KI ) +
$ V2*SMALLA( J, 4, KI ) +
$ V3*SMALLA( J, 5, KI )
SMALLA( J, 3, KI ) = SMALLA( J, 3, KI ) - SUM*T1
SMALLA( J, 4, KI ) = SMALLA( J, 4, KI ) - SUM*T2
SMALLA( J, 5, KI ) = SMALLA( J, 5, KI ) - SUM*T3
130 CONTINUE
END IF
END IF
*
MODKM1 = MOD( ISTART-1, HBL )
IF( ( MYROW.EQ.ICURROW( KI ) ) .AND.
$ ( MYCOL.EQ.ICURCOL( KI ) ) .AND.
$ ( ( ( MODKM1.EQ.HBL-2 ) .AND. ( ISTART.EQ.I-
$ 1 ) ) .OR. ( ( MODKM1.LT.HBL-2 ) .AND. ( ISTART.LE.I-
$ 1 ) ) ) ) THEN
*
* (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART)
*
IROW1 = KROW( KI )
ICOL1 = LOCALK2( KI )
DO 140 K = ISTART, ISTOP
*
* Catch up on column & border work
*
NR = MIN( 3, I-K+1 )
V2 = WORK( VECSIDX+( K-1 )*3+1 )
V3 = WORK( VECSIDX+( K-1 )*3+2 )
T1 = WORK( VECSIDX+( K-1 )*3+3 )
IF( K.LT.ISTOP ) THEN
*
* Do some work so next step is ready...
*
T2 = T1*V2
T3 = T1*V3
CALL DLAREF( 'Col', A, LDA, .FALSE., Z, LDZ,
$ .FALSE., ICOL1, ICOL1, ISTART,
$ ISTOP, MIN( ISTART+1, I )-K+IROW1,
$ IROW1, LILOZ, LIHIZ,
$ WORK( VECSIDX+1 ), V2, V3, T1, T2,
$ T3 )
IROW1 = IROW1 + 1
ICOL1 = ICOL1 + 1
ELSE
IF( ( NR.EQ.3 ) .AND. ( MOD( K-1,
$ HBL ).LT.HBL-2 ) ) THEN
T2 = T1*V2
T3 = T1*V3
CALL DLAREF( 'Row', A, LDA, .FALSE., Z, LDZ,
$ .FALSE., IROW1, IROW1, ISTART,
$ ISTOP, ICOL1, MIN( MIN( K2( KI )
$ +1, I-1 ), I2 )-K+ICOL1, LILOZ,
$ LIHIZ, WORK( VECSIDX+1 ), V2,
$ V3, T1, T2, T3 )
END IF
END IF
140 CONTINUE
END IF
*
* Send SMALLA back again.
*
K = ISTART
MODKM1 = MOD( K-1, HBL )
IF( ( MODKM1.GE.HBL-2 ) .AND. ( K.LE.I-1 ) ) THEN
IF( ( MODKM1.EQ.HBL-2 ) .AND. ( K.LT.I-1 ) ) THEN
*
* Copy 6 elements from global A(K-1:K+4,K-1:K+4)
*
CALL INFOG2L( K+2, K+2, DESCA, NPROW, NPCOL, MYROW,
$ MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
CALL PDLACP3( MIN( 6, N-K+2 ), K-1, A, DESCA,
$ SMALLA( 1, 1, KI ), 6, ITMP1, ITMP2,
$ 1 )
*
END IF
IF( MODKM1.EQ.HBL-1 ) THEN
*
* Copy 6 elements from global A(K-2:K+3,K-2:K+3)
*
CALL INFOG2L( K+1, K+1, DESCA, NPROW, NPCOL, MYROW,
$ MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
CALL PDLACP3( MIN( 6, N-K+3 ), K-2, A, DESCA,
$ SMALLA( 1, 1, KI ), 6, ITMP1, ITMP2,
$ 1 )
END IF
END IF
*
150 CONTINUE
*
* Now start major set of block ROW reflections
*
DO 160 KI = 1, IBULGE
IF( ( MYROW.NE.ICURROW( KI ) ) .AND.
$ ( DOWN.NE.ICURROW( KI ) ) )GO TO 160
ISTART = MAX( K1( KI ), M )
ISTOP = MIN( K2( KI ), I-1 )
*
IF( ( ISTOP.GT.ISTART ) .AND.
$ ( MOD( ISTART-1, HBL ).LT.HBL-2 ) .AND.
$ ( ICURROW( KI ).EQ.MYROW ) ) THEN
IROW1 = MIN( K2( KI )+1, I-1 ) + 1
CALL INFOG1L( IROW1, HBL, NPCOL, MYCOL, 0, ITMP1,
$ ITMP2 )
ITMP2 = NUMROC( I2, HBL, MYCOL, 0, NPCOL )
II = KROW( KI )
CALL DLAREF( 'Row', A, LDA, WANTZ, Z, LDZ, .TRUE., II,
$ II, ISTART, ISTOP, ITMP1, ITMP2, LILOZ,
$ LIHIZ, WORK( VECSIDX+1 ), V2, V3, T1, T2,
$ T3 )
END IF
160 CONTINUE
*
DO 180 KI = 1, IBULGE
IF( KROW( KI ).GT.KP2ROW( KI ) )
$ GO TO 180
IF( ( MYROW.NE.ICURROW( KI ) ) .AND.
$ ( DOWN.NE.ICURROW( KI ) ) )GO TO 180
ISTART = MAX( K1( KI ), M )
ISTOP = MIN( K2( KI ), I-1 )
IF( ( ISTART.EQ.ISTOP ) .OR.
$ ( MOD( ISTART-1, HBL ).GE.HBL-2 ) .OR.
$ ( ICURROW( KI ).NE.MYROW ) ) THEN
DO 170 K = ISTART, ISTOP
V2 = WORK( VECSIDX+( K-1 )*3+1 )
V3 = WORK( VECSIDX+( K-1 )*3+2 )
T1 = WORK( VECSIDX+( K-1 )*3+3 )
NR = MIN( 3, I-K+1 )
IF( ( NR.EQ.3 ) .AND. ( KROW( KI ).LE.
$ KP2ROW( KI ) ) ) THEN
IF( ( K.LT.ISTOP ) .AND.
$ ( MOD( K-1, HBL ).LT.HBL-2 ) ) THEN
ITMP1 = MIN( K2( KI )+1, I-1 ) + 1
ELSE
IF( MOD( K-1, HBL ).LT.HBL-2 ) THEN
ITMP1 = MIN( K2( KI )+1, I-1 ) + 1
END IF
IF( MOD( K-1, HBL ).EQ.HBL-2 ) THEN
ITMP1 = MIN( K+4, I2 ) + 1
END IF
IF( MOD( K-1, HBL ).EQ.HBL-1 ) THEN
ITMP1 = MIN( K+3, I2 ) + 1
END IF
END IF
*
* Find local coor of rows K through K+2
*
IROW1 = KROW( KI )
IROW2 = KP2ROW( KI )
CALL INFOG1L( ITMP1, HBL, NPCOL, MYCOL, 0,
$ ICOL1, ICOL2 )
ICOL2 = NUMROC( I2, HBL, MYCOL, 0, NPCOL )
IF( ( MOD( K-1, HBL ).LT.HBL-2 ) .OR.
$ ( NPROW.EQ.1 ) ) THEN
T2 = T1*V2
T3 = T1*V3
CALL DLAREF( 'Row', A, LDA, WANTZ, Z, LDZ,
$ .FALSE., IROW1, IROW1, ISTART,
$ ISTOP, ICOL1, ICOL2, LILOZ,
$ LIHIZ, WORK( VECSIDX+1 ), V2,
$ V3, T1, T2, T3 )
END IF
IF( ( MOD( K-1, HBL ).EQ.HBL-2 ) .AND.
$ ( NPROW.GT.1 ) ) THEN
IF( IROW1.EQ.IROW2 ) THEN
CALL DGESD2D( CONTXT, 1, ICOL2-ICOL1+1,
$ A( ( ICOL1-1 )*LDA+IROW2 ),
$ LDA, UP, MYCOL )
END IF
END IF
IF( ( MOD( K-1, HBL ).EQ.HBL-1 ) .AND.
$ ( NPROW.GT.1 ) ) THEN
IF( IROW1.EQ.IROW2 ) THEN
CALL DGESD2D( CONTXT, 1, ICOL2-ICOL1+1,
$ A( ( ICOL1-1 )*LDA+IROW1 ),
$ LDA, DOWN, MYCOL )
END IF
END IF
END IF
170 CONTINUE
END IF
180 CONTINUE
*
DO 220 KI = 1, IBULGE
IF( KROW( KI ).GT.KP2ROW( KI ) )
$ GO TO 220
IF( ( MYROW.NE.ICURROW( KI ) ) .AND.
$ ( DOWN.NE.ICURROW( KI ) ) )GO TO 220
ISTART = MAX( K1( KI ), M )
ISTOP = MIN( K2( KI ), I-1 )
IF( ( ISTART.EQ.ISTOP ) .OR.
$ ( MOD( ISTART-1, HBL ).GE.HBL-2 ) .OR.
$ ( ICURROW( KI ).NE.MYROW ) ) THEN
DO 210 K = ISTART, ISTOP
V2 = WORK( VECSIDX+( K-1 )*3+1 )
V3 = WORK( VECSIDX+( K-1 )*3+2 )
T1 = WORK( VECSIDX+( K-1 )*3+3 )
NR = MIN( 3, I-K+1 )
IF( ( NR.EQ.3 ) .AND. ( KROW( KI ).LE.
$ KP2ROW( KI ) ) ) THEN
IF( ( K.LT.ISTOP ) .AND.
$ ( MOD( K-1, HBL ).LT.HBL-2 ) ) THEN
ITMP1 = MIN( K2( KI )+1, I-1 ) + 1
ELSE
IF( MOD( K-1, HBL ).LT.HBL-2 ) THEN
ITMP1 = MIN( K2( KI )+1, I-1 ) + 1
END IF
IF( MOD( K-1, HBL ).EQ.HBL-2 ) THEN
ITMP1 = MIN( K+4, I2 ) + 1
END IF
IF( MOD( K-1, HBL ).EQ.HBL-1 ) THEN
ITMP1 = MIN( K+3, I2 ) + 1
END IF
END IF
*
IROW1 = KROW( KI ) + K - ISTART
IROW2 = KP2ROW( KI ) + K - ISTART
CALL INFOG1L( ITMP1, HBL, NPCOL, MYCOL, 0,
$ ICOL1, ICOL2 )
ICOL2 = NUMROC( I2, HBL, MYCOL, 0, NPCOL )
IF( ( MOD( K-1, HBL ).EQ.HBL-2 ) .AND.
$ ( NPROW.GT.1 ) ) THEN
IF( IROW1.NE.IROW2 ) THEN
CALL DGERV2D( CONTXT, 1, ICOL2-ICOL1+1,
$ WORK( IRBUF+1 ), 1, DOWN,
$ MYCOL )
T2 = T1*V2
T3 = T1*V3
DO 190 J = ICOL1, ICOL2
SUM = A( ( J-1 )*LDA+IROW1 ) +
$ V2*A( ( J-1 )*LDA+IROW1+1 ) +
$ V3*WORK( IRBUF+J-ICOL1+1 )
A( ( J-1 )*LDA+IROW1 ) = A( ( J-1 )*
$ LDA+IROW1 ) - SUM*T1
A( ( J-1 )*LDA+IROW1+1 ) = A( ( J-1 )*
$ LDA+IROW1+1 ) - SUM*T2
WORK( IRBUF+J-ICOL1+1 ) = WORK( IRBUF+
$ J-ICOL1+1 ) - SUM*T3
190 CONTINUE
CALL DGESD2D( CONTXT, 1, ICOL2-ICOL1+1,
$ WORK( IRBUF+1 ), 1, DOWN,
$ MYCOL )
END IF
END IF
IF( ( MOD( K-1, HBL ).EQ.HBL-1 ) .AND.
$ ( NPROW.GT.1 ) ) THEN
IF( IROW1.NE.IROW2 ) THEN
CALL DGERV2D( CONTXT, 1, ICOL2-ICOL1+1,
$ WORK( IRBUF+1 ), 1, UP,
$ MYCOL )
T2 = T1*V2
T3 = T1*V3
DO 200 J = ICOL1, ICOL2
SUM = WORK( IRBUF+J-ICOL1+1 ) +
$ V2*A( ( J-1 )*LDA+IROW1 ) +
$ V3*A( ( J-1 )*LDA+IROW1+1 )
WORK( IRBUF+J-ICOL1+1 ) = WORK( IRBUF+
$ J-ICOL1+1 ) - SUM*T1
A( ( J-1 )*LDA+IROW1 ) = A( ( J-1 )*
$ LDA+IROW1 ) - SUM*T2
A( ( J-1 )*LDA+IROW1+1 ) = A( ( J-1 )*
$ LDA+IROW1+1 ) - SUM*T3
200 CONTINUE
CALL DGESD2D( CONTXT, 1, ICOL2-ICOL1+1,
$ WORK( IRBUF+1 ), 1, UP,
$ MYCOL )
END IF
END IF
END IF
210 CONTINUE
END IF
220 CONTINUE
*
DO 240 KI = 1, IBULGE
IF( KROW( KI ).GT.KP2ROW( KI ) )
$ GO TO 240
IF( ( MYROW.NE.ICURROW( KI ) ) .AND.
$ ( DOWN.NE.ICURROW( KI ) ) )GO TO 240
ISTART = MAX( K1( KI ), M )
ISTOP = MIN( K2( KI ), I-1 )
IF( ( ISTART.EQ.ISTOP ) .OR.
$ ( MOD( ISTART-1, HBL ).GE.HBL-2 ) .OR.
$ ( ICURROW( KI ).NE.MYROW ) ) THEN
DO 230 K = ISTART, ISTOP
V2 = WORK( VECSIDX+( K-1 )*3+1 )
V3 = WORK( VECSIDX+( K-1 )*3+2 )
T1 = WORK( VECSIDX+( K-1 )*3+3 )
NR = MIN( 3, I-K+1 )
IF( ( NR.EQ.3 ) .AND. ( KROW( KI ).LE.
$ KP2ROW( KI ) ) ) THEN
IF( ( K.LT.ISTOP ) .AND.
$ ( MOD( K-1, HBL ).LT.HBL-2 ) ) THEN
ITMP1 = MIN( K2( KI )+1, I-1 ) + 1
ELSE
IF( MOD( K-1, HBL ).LT.HBL-2 ) THEN
ITMP1 = MIN( K2( KI )+1, I-1 ) + 1
END IF
IF( MOD( K-1, HBL ).EQ.HBL-2 ) THEN
ITMP1 = MIN( K+4, I2 ) + 1
END IF
IF( MOD( K-1, HBL ).EQ.HBL-1 ) THEN
ITMP1 = MIN( K+3, I2 ) + 1
END IF
END IF
*
IROW1 = KROW( KI ) + K - ISTART
IROW2 = KP2ROW( KI ) + K - ISTART
CALL INFOG1L( ITMP1, HBL, NPCOL, MYCOL, 0,
$ ICOL1, ICOL2 )
ICOL2 = NUMROC( I2, HBL, MYCOL, 0, NPCOL )
IF( ( MOD( K-1, HBL ).EQ.HBL-2 ) .AND.
$ ( NPROW.GT.1 ) ) THEN
IF( IROW1.EQ.IROW2 ) THEN
CALL DGERV2D( CONTXT, 1, ICOL2-ICOL1+1,
$ A( ( ICOL1-1 )*LDA+IROW2 ),
$ LDA, UP, MYCOL )
END IF
END IF
IF( ( MOD( K-1, HBL ).EQ.HBL-1 ) .AND.
$ ( NPROW.GT.1 ) ) THEN
IF( IROW1.EQ.IROW2 ) THEN
CALL DGERV2D( CONTXT, 1, ICOL2-ICOL1+1,
$ A( ( ICOL1-1 )*LDA+IROW1 ),
$ LDA, DOWN, MYCOL )
END IF
END IF
END IF
230 CONTINUE
END IF
240 CONTINUE
250 CONTINUE
*
* Now start major set of block COL reflections
*
DO 260 KI = 1, IBULGE
IF( ( MYCOL.NE.ICURCOL( KI ) ) .AND.
$ ( RIGHT.NE.ICURCOL( KI ) ) )GO TO 260
ISTART = MAX( K1( KI ), M )
ISTOP = MIN( K2( KI ), I-1 )
*
IF( ( ( MOD( ISTART-1, HBL ).LT.HBL-2 ) .OR. ( NPCOL.EQ.
$ 1 ) ) .AND. ( ICURCOL( KI ).EQ.MYCOL ) .AND.
$ ( I-ISTOP+1.GE.3 ) ) THEN
K = ISTART
IF( ( K.LT.ISTOP ) .AND. ( MOD( K-1,
$ HBL ).LT.HBL-2 ) ) THEN
ITMP1 = MIN( ISTART+1, I ) - 1
ELSE
IF( MOD( K-1, HBL ).LT.HBL-2 ) THEN
ITMP1 = MIN( K+3, I )
END IF
IF( MOD( K-1, HBL ).EQ.HBL-2 ) THEN
ITMP1 = MAX( I1, K-1 ) - 1
END IF
IF( MOD( K-1, HBL ).EQ.HBL-1 ) THEN
ITMP1 = MAX( I1, K-2 ) - 1
END IF
END IF
*
ICOL1 = KCOL( KI )
CALL INFOG1L( I1, HBL, NPROW, MYROW, 0, IROW1, IROW2 )
IROW2 = NUMROC( ITMP1, HBL, MYROW, 0, NPROW )
IF( IROW1.LE.IROW2 ) THEN
ITMP2 = IROW2
ELSE
ITMP2 = -1
END IF
CALL DLAREF( 'Col', A, LDA, WANTZ, Z, LDZ, .TRUE.,
$ ICOL1, ICOL1, ISTART, ISTOP, IROW1,
$ IROW2, LILOZ, LIHIZ, WORK( VECSIDX+1 ),
$ V2, V3, T1, T2, T3 )
K = ISTOP
IF( MOD( K-1, HBL ).LT.HBL-2 ) THEN
*
* Do from ITMP1+1 to MIN(K+3,I)
*
IF( MOD( K-1, HBL ).LT.HBL-3 ) THEN
IROW1 = ITMP2 + 1
IF( MOD( ( ITMP1 / HBL ), NPROW ).EQ.MYROW )
$ THEN
IF( ITMP2.GT.0 ) THEN
IROW2 = ITMP2 + MIN( K+3, I ) - ITMP1
ELSE
IROW2 = IROW1 - 1
END IF
ELSE
IROW2 = IROW1 - 1
END IF
ELSE
CALL INFOG1L( ITMP1+1, HBL, NPROW, MYROW, 0,
$ IROW1, IROW2 )
IROW2 = NUMROC( MIN( K+3, I ), HBL, MYROW, 0,
$ NPROW )
END IF
V2 = WORK( VECSIDX+( K-1 )*3+1 )
V3 = WORK( VECSIDX+( K-1 )*3+2 )
T1 = WORK( VECSIDX+( K-1 )*3+3 )
T2 = T1*V2
T3 = T1*V3
ICOL1 = KCOL( KI ) + ISTOP - ISTART
CALL DLAREF( 'Col', A, LDA, .FALSE., Z, LDZ,
$ .FALSE., ICOL1, ICOL1, ISTART, ISTOP,
$ IROW1, IROW2, LILOZ, LIHIZ,
$ WORK( VECSIDX+1 ), V2, V3, T1, T2,
$ T3 )
END IF
END IF
260 CONTINUE
*
DO 320 KI = 1, IBULGE
IF( KCOL( KI ).GT.KP2COL( KI ) )
$ GO TO 320
IF( ( MYCOL.NE.ICURCOL( KI ) ) .AND.
$ ( RIGHT.NE.ICURCOL( KI ) ) )GO TO 320
ISTART = MAX( K1( KI ), M )
ISTOP = MIN( K2( KI ), I-1 )
IF( MOD( ISTART-1, HBL ).GE.HBL-2 ) THEN
*
* INFO is found in a buffer
*
ISPEC = 1
ELSE
*
* All INFO is local
*
ISPEC = 0
END IF
*
DO 310 K = ISTART, ISTOP
*
V2 = WORK( VECSIDX+( K-1 )*3+1 )
V3 = WORK( VECSIDX+( K-1 )*3+2 )
T1 = WORK( VECSIDX+( K-1 )*3+3 )
NR = MIN( 3, I-K+1 )
IF( ( NR.EQ.3 ) .AND. ( KCOL( KI ).LE.KP2COL( KI ) ) )
$ THEN
*
IF( ( K.LT.ISTOP ) .AND.
$ ( MOD( K-1, HBL ).LT.HBL-2 ) ) THEN
ITMP1 = MIN( ISTART+1, I ) - 1
ELSE
IF( MOD( K-1, HBL ).LT.HBL-2 ) THEN
ITMP1 = MIN( K+3, I )
END IF
IF( MOD( K-1, HBL ).EQ.HBL-2 ) THEN
ITMP1 = MAX( I1, K-1 ) - 1
END IF
IF( MOD( K-1, HBL ).EQ.HBL-1 ) THEN
ITMP1 = MAX( I1, K-2 ) - 1
END IF
END IF
ICOL1 = KCOL( KI ) + K - ISTART
ICOL2 = KP2COL( KI ) + K - ISTART
CALL INFOG1L( I1, HBL, NPROW, MYROW, 0, IROW1,
$ IROW2 )
IROW2 = NUMROC( ITMP1, HBL, MYROW, 0, NPROW )
IF( ( MOD( K-1, HBL ).EQ.HBL-2 ) .AND.
$ ( NPCOL.GT.1 ) ) THEN
IF( ICOL1.EQ.ICOL2 ) THEN
CALL DGESD2D( CONTXT, IROW2-IROW1+1, 1,
$ A( ( ICOL1-1 )*LDA+IROW1 ),
$ LDA, MYROW, LEFT )
CALL DGERV2D( CONTXT, IROW2-IROW1+1, 1,
$ A( ( ICOL1-1 )*LDA+IROW1 ),
$ LDA, MYROW, LEFT )
ELSE
CALL DGERV2D( CONTXT, IROW2-IROW1+1, 1,
$ WORK( ICBUF+1 ), IROW2-IROW1+1,
$ MYROW, RIGHT )
T2 = T1*V2
T3 = T1*V3
DO 270 J = IROW1, IROW2
SUM = A( ( ICOL1-1 )*LDA+J ) +
$ V2*A( ICOL1*LDA+J ) +
$ V3*WORK( ICBUF+J-IROW1+1 )
A( ( ICOL1-1 )*LDA+J ) = A( ( ICOL1-1 )*
$ LDA+J ) - SUM*T1
A( ICOL1*LDA+J ) = A( ICOL1*LDA+J ) -
$ SUM*T2
WORK( ICBUF+J-IROW1+1 ) = WORK( ICBUF+J-
$ IROW1+1 ) - SUM*T3
270 CONTINUE
CALL DGESD2D( CONTXT, IROW2-IROW1+1, 1,
$ WORK( ICBUF+1 ), IROW2-IROW1+1,
$ MYROW, RIGHT )
END IF
END IF
IF( ( MOD( K-1, HBL ).EQ.HBL-1 ) .AND.
$ ( NPCOL.GT.1 ) ) THEN
IF( ICOL1.EQ.ICOL2 ) THEN
CALL DGESD2D( CONTXT, IROW2-IROW1+1, 1,
$ A( ( ICOL1-1 )*LDA+IROW1 ),
$ LDA, MYROW, RIGHT )
CALL DGERV2D( CONTXT, IROW2-IROW1+1, 1,
$ A( ( ICOL1-1 )*LDA+IROW1 ),
$ LDA, MYROW, RIGHT )
ELSE
CALL DGERV2D( CONTXT, IROW2-IROW1+1, 1,
$ WORK( ICBUF+1 ), IROW2-IROW1+1,
$ MYROW, LEFT )
T2 = T1*V2
T3 = T1*V3
DO 280 J = IROW1, IROW2
SUM = WORK( ICBUF+J-IROW1+1 ) +
$ V2*A( ( ICOL1-1 )*LDA+J ) +
$ V3*A( ICOL1*LDA+J )
WORK( ICBUF+J-IROW1+1 ) = WORK( ICBUF+J-
$ IROW1+1 ) - SUM*T1
A( ( ICOL1-1 )*LDA+J ) = A( ( ICOL1-1 )*
$ LDA+J ) - SUM*T2
A( ICOL1*LDA+J ) = A( ICOL1*LDA+J ) -
$ SUM*T3
280 CONTINUE
CALL DGESD2D( CONTXT, IROW2-IROW1+1, 1,
$ WORK( ICBUF+1 ), IROW2-IROW1+1,
$ MYROW, LEFT )
END IF
END IF
*
* If we want Z and we haven't already done any Z
IF( ( WANTZ ) .AND. ( MOD( K-1,
$ HBL ).GE.HBL-2 ) .AND. ( NPCOL.GT.1 ) ) THEN
*
* Accumulate transformations in the matrix Z
*
IROW1 = LILOZ
IROW2 = LIHIZ
IF( MOD( K-1, HBL ).EQ.HBL-2 ) THEN
IF( ICOL1.EQ.ICOL2 ) THEN
CALL DGESD2D( CONTXT, IROW2-IROW1+1, 1,
$ Z( ( ICOL1-1 )*LDZ+IROW1 ),
$ LDZ, MYROW, LEFT )
CALL DGERV2D( CONTXT, IROW2-IROW1+1, 1,
$ Z( ( ICOL1-1 )*LDZ+IROW1 ),
$ LDZ, MYROW, LEFT )
ELSE
CALL DGERV2D( CONTXT, IROW2-IROW1+1, 1,
$ WORK( ICBUF+1 ),
$ IROW2-IROW1+1, MYROW,
$ RIGHT )
T2 = T1*V2
T3 = T1*V3
ICOL1 = ( ICOL1-1 )*LDZ
DO 290 J = IROW1, IROW2
SUM = Z( ICOL1+J ) +
$ V2*Z( ICOL1+J+LDZ ) +
$ V3*WORK( ICBUF+J-IROW1+1 )
Z( J+ICOL1 ) = Z( J+ICOL1 ) - SUM*T1
Z( J+ICOL1+LDZ ) = Z( J+ICOL1+LDZ ) -
$ SUM*T2
WORK( ICBUF+J-IROW1+1 ) = WORK( ICBUF+
$ J-IROW1+1 ) - SUM*T3
290 CONTINUE
CALL DGESD2D( CONTXT, IROW2-IROW1+1, 1,
$ WORK( ICBUF+1 ),
$ IROW2-IROW1+1, MYROW,
$ RIGHT )
END IF
END IF
IF( MOD( K-1, HBL ).EQ.HBL-1 ) THEN
IF( ICOL1.EQ.ICOL2 ) THEN
CALL DGESD2D( CONTXT, IROW2-IROW1+1, 1,
$ Z( ( ICOL1-1 )*LDZ+IROW1 ),
$ LDZ, MYROW, RIGHT )
CALL DGERV2D( CONTXT, IROW2-IROW1+1, 1,
$ Z( ( ICOL1-1 )*LDZ+IROW1 ),
$ LDZ, MYROW, RIGHT )
ELSE
CALL DGERV2D( CONTXT, IROW2-IROW1+1, 1,
$ WORK( ICBUF+1 ),
$ IROW2-IROW1+1, MYROW, LEFT )
T2 = T1*V2
T3 = T1*V3
ICOL1 = ( ICOL1-1 )*LDZ
DO 300 J = IROW1, IROW2
SUM = WORK( ICBUF+J-IROW1+1 ) +
$ V2*Z( J+ICOL1 ) +
$ V3*Z( J+ICOL1+LDZ )
WORK( ICBUF+J-IROW1+1 ) = WORK( ICBUF+
$ J-IROW1+1 ) - SUM*T1
Z( J+ICOL1 ) = Z( J+ICOL1 ) - SUM*T2
Z( J+ICOL1+LDZ ) = Z( J+ICOL1+LDZ ) -
$ SUM*T3
300 CONTINUE
CALL DGESD2D( CONTXT, IROW2-IROW1+1, 1,
$ WORK( ICBUF+1 ),
$ IROW2-IROW1+1, MYROW, LEFT )
END IF
END IF
END IF
IF( ICURCOL( KI ).EQ.MYCOL ) THEN
IF( ( ISPEC.EQ.0 ) .OR. ( NPCOL.EQ.1 ) ) THEN
LOCALK2( KI ) = LOCALK2( KI ) + 1
END IF
ELSE
IF( ( MOD( K-1, HBL ).EQ.HBL-1 ) .AND.
$ ( ICURCOL( KI ).EQ.RIGHT ) ) THEN
IF( K.GT.M ) THEN
LOCALK2( KI ) = LOCALK2( KI ) + 2
ELSE
LOCALK2( KI ) = LOCALK2( KI ) + 1
END IF
END IF
IF( ( MOD( K-1, HBL ).EQ.HBL-2 ) .AND.
$ ( I-K.EQ.2 ) .AND. ( ICURCOL( KI ).EQ.
$ RIGHT ) ) THEN
LOCALK2( KI ) = LOCALK2( KI ) + 2
END IF
END IF
END IF
310 CONTINUE
320 CONTINUE
*
* Column work done
*
330 CONTINUE
*
* Now do NR=2 work
*
DO 410 KI = 1, IBULGE
ISTART = MAX( K1( KI ), M )
ISTOP = MIN( K2( KI ), I-1 )
IF( MOD( ISTART-1, HBL ).GE.HBL-2 ) THEN
*
* INFO is found in a buffer
*
ISPEC = 1
ELSE
*
* All INFO is local
*
ISPEC = 0
END IF
*
DO 400 K = ISTART, ISTOP
*
V2 = WORK( VECSIDX+( K-1 )*3+1 )
V3 = WORK( VECSIDX+( K-1 )*3+2 )
T1 = WORK( VECSIDX+( K-1 )*3+3 )
NR = MIN( 3, I-K+1 )
IF( NR.EQ.2 ) THEN
IF ( ICURROW( KI ).EQ.MYROW ) THEN
T2 = T1*V2
END IF
IF ( ICURCOL( KI ).EQ.MYCOL ) THEN
T2 = T1*V2
END IF
*
* Apply G from the left to transform the rows of the matrix
* in columns K to I2.
*
CALL INFOG1L( K, HBL, NPCOL, MYCOL, 0, LILOH,
$ LIHIH )
LIHIH = NUMROC( I2, HBL, MYCOL, 0, NPCOL )
CALL INFOG1L( 1, HBL, NPROW, MYROW, 0, ITMP2,
$ ITMP1 )
ITMP1 = NUMROC( K+1, HBL, MYROW, 0, NPROW )
IF( ICURROW( KI ).EQ.MYROW ) THEN
IF( ( ISPEC.EQ.0 ) .OR. ( NPROW.EQ.1 ) .OR.
$ ( MOD( K-1, HBL ).EQ.HBL-2 ) ) THEN
ITMP1 = ITMP1 - 1
DO 340 J = ( LILOH-1 )*LDA,
$ ( LIHIH-1 )*LDA, LDA
SUM = A( ITMP1+J ) + V2*A( ITMP1+1+J )
A( ITMP1+J ) = A( ITMP1+J ) - SUM*T1
A( ITMP1+1+J ) = A( ITMP1+1+J ) - SUM*T2
340 CONTINUE
ELSE
IF( MOD( K-1, HBL ).EQ.HBL-1 ) THEN
CALL DGERV2D( CONTXT, 1, LIHIH-LILOH+1,
$ WORK( IRBUF+1 ), 1, UP,
$ MYCOL )
DO 350 J = LILOH, LIHIH
SUM = WORK( IRBUF+J-LILOH+1 ) +
$ V2*A( ( J-1 )*LDA+ITMP1 )
WORK( IRBUF+J-LILOH+1 ) = WORK( IRBUF+
$ J-LILOH+1 ) - SUM*T1
A( ( J-1 )*LDA+ITMP1 ) = A( ( J-1 )*
$ LDA+ITMP1 ) - SUM*T2
350 CONTINUE
CALL DGESD2D( CONTXT, 1, LIHIH-LILOH+1,
$ WORK( IRBUF+1 ), 1, UP,
$ MYCOL )
END IF
END IF
ELSE
IF( ( MOD( K-1, HBL ).EQ.HBL-1 ) .AND.
$ ( ICURROW( KI ).EQ.DOWN ) ) THEN
CALL DGESD2D( CONTXT, 1, LIHIH-LILOH+1,
$ A( ( LILOH-1 )*LDA+ITMP1 ),
$ LDA, DOWN, MYCOL )
CALL DGERV2D( CONTXT, 1, LIHIH-LILOH+1,
$ A( ( LILOH-1 )*LDA+ITMP1 ),
$ LDA, DOWN, MYCOL )
END IF
END IF
*
* Apply G from the right to transform the columns of the
* matrix in rows I1 to MIN(K+3,I).
*
CALL INFOG1L( I1, HBL, NPROW, MYROW, 0, LILOH,
$ LIHIH )
LIHIH = NUMROC( I, HBL, MYROW, 0, NPROW )
*
IF( ICURCOL( KI ).EQ.MYCOL ) THEN
* LOCAL A(LILOZ:LIHIZ,LOCALK2:LOCALK2+2)
IF( ( ISPEC.EQ.0 ) .OR. ( NPCOL.EQ.1 ) .OR.
$ ( MOD( K-1, HBL ).EQ.HBL-2 ) ) THEN
CALL INFOG1L( K, HBL, NPCOL, MYCOL, 0, ITMP1,
$ ITMP2 )
ITMP2 = NUMROC( K+1, HBL, MYCOL, 0, NPCOL )
DO 360 J = LILOH, LIHIH
SUM = A( ( ITMP1-1 )*LDA+J ) +
$ V2*A( ITMP1*LDA+J )
A( ( ITMP1-1 )*LDA+J ) = A( ( ITMP1-1 )*
$ LDA+J ) - SUM*T1
A( ITMP1*LDA+J ) = A( ITMP1*LDA+J ) -
$ SUM*T2
360 CONTINUE
ELSE
ITMP1 = LOCALK2( KI )
IF( MOD( K-1, HBL ).EQ.HBL-1 ) THEN
CALL DGERV2D( CONTXT, LIHIH-LILOH+1, 1,
$ WORK( ICBUF+1 ),
$ LIHIH-LILOH+1, MYROW, LEFT )
DO 370 J = LILOH, LIHIH
SUM = WORK( ICBUF+J ) +
$ V2*A( ( ITMP1-1 )*LDA+J )
WORK( ICBUF+J ) = WORK( ICBUF+J ) -
$ SUM*T1
A( ( ITMP1-1 )*LDA+J )
$ = A( ( ITMP1-1 )*LDA+J ) - SUM*T2
370 CONTINUE
CALL DGESD2D( CONTXT, LIHIH-LILOH+1, 1,
$ WORK( ICBUF+1 ),
$ LIHIH-LILOH+1, MYROW, LEFT )
END IF
END IF
ELSE
IF( ( MOD( K-1, HBL ).EQ.HBL-1 ) .AND.
$ ( ICURCOL( KI ).EQ.RIGHT ) ) THEN
ITMP1 = KCOL( KI )
CALL DGESD2D( CONTXT, LIHIH-LILOH+1, 1,
$ A( ( ITMP1-1 )*LDA+LILOH ),
$ LDA, MYROW, RIGHT )
CALL INFOG1L( K, HBL, NPCOL, MYCOL, 0, ITMP1,
$ ITMP2 )
ITMP2 = NUMROC( K+1, HBL, MYCOL, 0, NPCOL )
CALL DGERV2D( CONTXT, LIHIH-LILOH+1, 1,
$ A( ( ITMP1-1 )*LDA+LILOH ),
$ LDA, MYROW, RIGHT )
END IF
END IF
*
IF( WANTZ ) THEN
*
* Accumulate transformations in the matrix Z
*
IF( ICURCOL( KI ).EQ.MYCOL ) THEN
* LOCAL Z(LILOZ:LIHIZ,LOCALK2:LOCALK2+2)
IF( ( ISPEC.EQ.0 ) .OR. ( NPCOL.EQ.1 ) .OR.
$ ( MOD( K-1, HBL ).EQ.HBL-2 ) ) THEN
ITMP1 = KCOL( KI ) + K - ISTART
ITMP1 = ( ITMP1-1 )*LDZ
DO 380 J = LILOZ, LIHIZ
SUM = Z( J+ITMP1 ) +
$ V2*Z( J+ITMP1+LDZ )
Z( J+ITMP1 ) = Z( J+ITMP1 ) - SUM*T1
Z( J+ITMP1+LDZ ) = Z( J+ITMP1+LDZ ) -
$ SUM*T2
380 CONTINUE
LOCALK2( KI ) = LOCALK2( KI ) + 1
ELSE
ITMP1 = LOCALK2( KI )
* IF WE ACTUALLY OWN COLUMN K
IF( MOD( K-1, HBL ).EQ.HBL-1 ) THEN
CALL DGERV2D( CONTXT, LIHIZ-LILOZ+1, 1,
$ WORK( ICBUF+1 ), LDZ,
$ MYROW, LEFT )
ITMP1 = ( ITMP1-1 )*LDZ
DO 390 J = LILOZ, LIHIZ
SUM = WORK( ICBUF+J ) +
$ V2*Z( J+ITMP1 )
WORK( ICBUF+J ) = WORK( ICBUF+J ) -
$ SUM*T1
Z( J+ITMP1 ) = Z( J+ITMP1 ) - SUM*T2
390 CONTINUE
CALL DGESD2D( CONTXT, LIHIZ-LILOZ+1, 1,
$ WORK( ICBUF+1 ), LDZ,
$ MYROW, LEFT )
LOCALK2( KI ) = LOCALK2( KI ) + 1
END IF
END IF
ELSE
*
* NO WORK BUT NEED TO UPDATE ANYWAY????
*
IF( ( MOD( K-1, HBL ).EQ.HBL-1 ) .AND.
$ ( ICURCOL( KI ).EQ.RIGHT ) ) THEN
ITMP1 = KCOL( KI )
ITMP1 = ( ITMP1-1 )*LDZ
CALL DGESD2D( CONTXT, LIHIZ-LILOZ+1, 1,
$ Z( LILOZ+ITMP1 ), LDZ,
$ MYROW, RIGHT )
CALL DGERV2D( CONTXT, LIHIZ-LILOZ+1, 1,
$ Z( LILOZ+ITMP1 ), LDZ,
$ MYROW, RIGHT )
LOCALK2( KI ) = LOCALK2( KI ) + 1
END IF
END IF
END IF
END IF
400 CONTINUE
*
* Adjust local information for this bulge
*
IF( NPROW.EQ.1 ) THEN
KROW( KI ) = KROW( KI ) + K2( KI ) - K1( KI ) + 1
KP2ROW( KI ) = KP2ROW( KI ) + K2( KI ) - K1( KI ) + 1
END IF
IF( ( MOD( K1( KI )-1, HBL ).LT.HBL-2 ) .AND.
$ ( ICURROW( KI ).EQ.MYROW ) .AND. ( NPROW.GT.1 ) )
$ THEN
KROW( KI ) = KROW( KI ) + K2( KI ) - K1( KI ) + 1
END IF
IF( ( MOD( K2( KI ), HBL ).LT.HBL-2 ) .AND.
$ ( ICURROW( KI ).EQ.MYROW ) .AND. ( NPROW.GT.1 ) )
$ THEN
KP2ROW( KI ) = KP2ROW( KI ) + K2( KI ) - K1( KI ) + 1
END IF
IF( ( MOD( K1( KI )-1, HBL ).GE.HBL-2 ) .AND.
$ ( ( MYROW.EQ.ICURROW( KI ) ) .OR. ( DOWN.EQ.
$ ICURROW( KI ) ) ) .AND. ( NPROW.GT.1 ) ) THEN
CALL INFOG1L( K2( KI )+1, HBL, NPROW, MYROW, 0,
$ KROW( KI ), ITMP2 )
ITMP2 = NUMROC( N, HBL, MYROW, 0, NPROW )
END IF
IF( ( MOD( K2( KI ), HBL ).GE.HBL-2 ) .AND.
$ ( ( MYROW.EQ.ICURROW( KI ) ) .OR. ( UP.EQ.
$ ICURROW( KI ) ) ) .AND. ( NPROW.GT.1 ) ) THEN
CALL INFOG1L( 1, HBL, NPROW, MYROW, 0, ITMP2,
$ KP2ROW( KI ) )
KP2ROW( KI ) = NUMROC( K2( KI )+3, HBL, MYROW, 0,
$ NPROW )
END IF
IF( NPCOL.EQ.1 ) THEN
KCOL( KI ) = KCOL( KI ) + K2( KI ) - K1( KI ) + 1
KP2COL( KI ) = KP2COL( KI ) + K2( KI ) - K1( KI ) + 1
END IF
IF( ( MOD( K1( KI )-1, HBL ).LT.HBL-2 ) .AND.
$ ( ICURCOL( KI ).EQ.MYCOL ) .AND. ( NPCOL.GT.1 ) )
$ THEN
KCOL( KI ) = KCOL( KI ) + K2( KI ) - K1( KI ) + 1
END IF
IF( ( MOD( K2( KI ), HBL ).LT.HBL-2 ) .AND.
$ ( ICURCOL( KI ).EQ.MYCOL ) .AND. ( NPCOL.GT.1 ) )
$ THEN
KP2COL( KI ) = KP2COL( KI ) + K2( KI ) - K1( KI ) + 1
END IF
IF( ( MOD( K1( KI )-1, HBL ).GE.HBL-2 ) .AND.
$ ( ( MYCOL.EQ.ICURCOL( KI ) ) .OR. ( RIGHT.EQ.
$ ICURCOL( KI ) ) ) .AND. ( NPCOL.GT.1 ) ) THEN
CALL INFOG1L( K2( KI )+1, HBL, NPCOL, MYCOL, 0,
$ KCOL( KI ), ITMP2 )
ITMP2 = NUMROC( N, HBL, MYCOL, 0, NPCOL )
END IF
IF( ( MOD( K2( KI ), HBL ).GE.HBL-2 ) .AND.
$ ( ( MYCOL.EQ.ICURCOL( KI ) ) .OR. ( LEFT.EQ.
$ ICURCOL( KI ) ) ) .AND. ( NPCOL.GT.1 ) ) THEN
CALL INFOG1L( 1, HBL, NPCOL, MYCOL, 0, ITMP2,
$ KP2COL( KI ) )
KP2COL( KI ) = NUMROC( K2( KI )+3, HBL, MYCOL, 0,
$ NPCOL )
END IF
K1( KI ) = K2( KI ) + 1
ISTOP = MIN( K1( KI )+ROTN-MOD( K1( KI ), ROTN ), I-2 )
ISTOP = MIN( ISTOP, K1( KI )+HBL-3-
$ MOD( K1( KI )-1, HBL ) )
ISTOP = MIN( ISTOP, I2-2 )
ISTOP = MAX( ISTOP, K1( KI ) )
* ISTOP = MIN( ISTOP , I-1 )
K2( KI ) = ISTOP
IF( K1( KI ).EQ.ISTOP ) THEN
IF( ( MOD( ISTOP-1, HBL ).EQ.HBL-2 ) .AND.
$ ( I-ISTOP.GT.1 ) ) THEN
*
* Next step switches rows & cols
*
ICURROW( KI ) = MOD( ICURROW( KI )+1, NPROW )
ICURCOL( KI ) = MOD( ICURCOL( KI )+1, NPCOL )
END IF
END IF
410 CONTINUE
IF( K2( IBULGE ).LE.I-1 )
$ GO TO 40
END IF
*
420 CONTINUE
*
* Failure to converge in remaining number of iterations
*
INFO = I
RETURN
*
430 CONTINUE
*
IF( L.EQ.I ) THEN
*
* H(I,I-1) is negligible: one eigenvalue has converged.
*
CALL INFOG2L( I, I, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW,
$ ICOL, ITMP1, ITMP2 )
IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN
WR( I ) = A( ( ICOL-1 )*LDA+IROW )
ELSE
WR( I ) = ZERO
END IF
WI( I ) = ZERO
ELSE IF( L.EQ.I-1 ) THEN
*
* H(I-1,I-2) is negligible: a pair of eigenvalues have converged.
*
WR( I-1 ) = ZERO
WR( I ) = ZERO
WI( I-1 ) = ZERO
WI( I ) = ZERO
MODKM1 = MOD( I-1+HBL, HBL )
CALL INFOG2L( I-1, I-1, DESCA, NPROW, NPCOL, MYROW, MYCOL,
$ IROW1, ICOL1, II, JJ )
IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN
H11 = A( ( ICOL1-1 )*LDA+IROW1 )
IF( MODKM1.NE.0 ) THEN
H21 = A( ( ICOL1-1 )*LDA+IROW1+1 )
H12 = A( ICOL1*LDA+IROW1 )
H22 = A( ICOL1*LDA+IROW1+1 )
ELSE
IF( NPROW.GT.1 ) THEN
CALL DGERV2D( CONTXT, 1, 1, H21, 1, DOWN, MYCOL )
ELSE
H21 = A( ( ICOL1-1 )*LDA+IROW1+1 )
END IF
IF( NPCOL.GT.1 ) THEN
CALL DGERV2D( CONTXT, 1, 1, H12, 1, MYROW, RIGHT )
ELSE
H12 = A( ICOL1*LDA+IROW1 )
END IF
IF( NUM.GT.1 ) THEN
CALL DGERV2D( CONTXT, 1, 1, H22, 1, DOWN, RIGHT )
ELSE
H22 = A( ICOL1*LDA+IROW1+1 )
END IF
END IF
H00 = HALF*( H11+H22 )
H10 = H11*H22 - H12*H21
ELSE
IF( MODKM1.EQ.0 ) THEN
IF( ( NPROW.GT.1 ) .AND. ( MYCOL.EQ.JJ ) .AND.
$ ( UP.EQ.II ) ) THEN
CALL INFOG2L( I, I-1, DESCA, NPROW, NPCOL, MYROW,
$ MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
CALL DGESD2D( CONTXT, 1, 1,
$ A( ( ICOL1-1 )*LDA+IROW1 ), 1, II, JJ )
END IF
IF( ( NPCOL.GT.1 ) .AND. ( LEFT.EQ.JJ ) .AND.
$ ( MYROW.EQ.II ) ) THEN
CALL INFOG2L( I-1, I, DESCA, NPROW, NPCOL, MYROW,
$ MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
CALL DGESD2D( CONTXT, 1, 1,
$ A( ( ICOL1-1 )*LDA+IROW1 ), 1, II, JJ )
END IF
IF( ( NUM.GT.1 ) .AND. ( LEFT.EQ.JJ ) .AND.
$ ( UP.EQ.II ) ) THEN
CALL INFOG2L( I, I, DESCA, NPROW, NPCOL, MYROW, MYCOL,
$ IROW1, ICOL1, ITMP1, ITMP2 )
CALL DGESD2D( CONTXT, 1, 1,
$ A( ( ICOL1-1 )*LDA+IROW1 ), 1, II, JJ )
END IF
END IF
H00 = ZERO
H10 = ZERO
END IF
H21 = H00*H00 - H10
IF( H21.GE.ZERO ) THEN
H21 = SQRT( H21 )
WR( I-1 ) = H00 + H21
WI( I-1 ) = ZERO
WR( I ) = H00 - H21
WI( I ) = ZERO
ELSE
H21 = SQRT( ABS( H21 ) )
WR( I-1 ) = H00
WI( I-1 ) = H21
WR( I ) = H00
WI( I ) = -H21
END IF
ELSE
*
* Find the eigenvalues in H(L:I,L:I), L < I-1
*
JBLK = I - L + 1
IF( JBLK.LE.2*IBLK ) THEN
CALL PDLACP3( I-L+1, L, A, DESCA, S1, 2*IBLK, 0, 0, 0 )
CALL DLAHQR( .FALSE., .FALSE., JBLK, 1, JBLK, S1, 2*IBLK,
$ WR( L ), WI( L ), 1, JBLK, Z, LDZ, IERR )
IF( NODE.NE.0 ) THEN
*
* Erase the eigenvalues
*
DO 440 K = L, I
WR( K ) = ZERO
WI( K ) = ZERO
440 CONTINUE
END IF
END IF
END IF
*
* Decrement number of remaining iterations, and return to start of
* the main loop with new value of I.
*
ITN = ITN - ITS
IF( M.EQ.L-10 ) THEN
I = L - 1
ELSE
I = M
END IF
* I = L - 1
GO TO 10
*
450 CONTINUE
CALL DGSUM2D( CONTXT, 'All', ' ', N, 1, WR, N, -1, -1 )
CALL DGSUM2D( CONTXT, 'All', ' ', N, 1, WI, N, -1, -1 )
RETURN
*
* END OF PDLAHQR
*
END
|